
  • 利用 OpenCV 的 EAST 文本检测器定位图像中的文本区域。
  • 提取每个文本 ROI,然后使用 OpenCV 和 Tesseract v4 进行文本识别。


from imutils.object_detection import non_max_suppression
from PIL import Image
import numpy as np
import pytesseract
import time
import cv2from matplotlib import pyplot as plt
import oscap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS, 15)def decode_predictions(scores, geometry):"""EAST 文本检测器两个参数:scores:文本区域的概率。geometry:文本区域的边界框位置。"""# The minimum probability of a detected text regionmin_confidence = 0.5# grab the number of rows and columns from the scores volume, then# initialize our set of bounding box rectangles and corresponding# confidence scoresnumRows, numCols = scores.shape[2:4]rects = []confidences = []# loop over the number of rowsfor y in range(0, numRows):# extract the scores (probabilities), followed by the# geometrical data used to derive potential bounding box# coordinates that surround textscoresData = scores[0, 0, y]xData0 = geometry[0, 0, y]xData1 = geometry[0, 1, y]xData2 = geometry[0, 2, y]xData3 = geometry[0, 3, y]anglesData = geometry[0, 4, y]# loop over the number of columnsfor x in range(0, numCols):# if our score does not have sufficient probability,# ignore itif scoresData[x] < min_confidence:continue# compute the offset factor as our resulting feature# maps will be 4x smaller than the input image(offsetX, offsetY) = (x * 4.0, y * 4.0)# extract the rotation angle for the prediction and# then compute the sin and cosineangle = anglesData[x]cos = np.cos(angle)sin = np.sin(angle)# use the geometry volume to derive the width and height# of the bounding boxh = xData0[x] + xData2[x]w = xData1[x] + xData3[x]# compute both the starting and ending (x, y)-coordinates# for the text prediction bounding boxendX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))startX = int(endX - w)startY = int(endY - h)# add the bounding box coordinates and probability score# to our respective listsrects.append((startX, startY, endX, endY))confidences.append(scoresData[x])# return a tuple of the bounding boxes and associated confidencesreturn (rects, confidences)def text_recognition(image):east_model = "frozen_east_text_detection.pb"# img_path = "images/road-sign-2-768x347.jpg"# set the new width and height and then determine the ratio in change for# both the width and height, both of them are multiples of 32newW, newH = 320, 320#  The (optional) amount of padding to add to each ROI border# You can try 0.05 for 5% or 0.10 for 10% (and so on) if find OCR result is incorrectpadding = 0.0# in order to apply Tesseract v4 to OCR text we must supply# (1) a language, (2) an OEM flag of 4, indicating that the we# wish to use the LSTM neural net model for OCR, and finally# (3) an OEM value, in this case, 7 which implies that we are# treating the ROI as a single line of textconfig = ("-l eng --oem 1 --psm 7")  # chi_simorig = image.copy()origH, origW = image.shape[:2]# calculate ratios that will be used to scale bounding box coordinatesrW = origW / float(newW)rH = origH / float(newH)# resize the image and grab the new image dimensionsimage = cv2.resize(image, (newW, newH))(H, W) = image.shape[:2]# define the two output layer names for the EAST detector model the first is the output probabilities# and the second can be used to derive the bounding box coordinates of textlayerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]# load the pre-trained EAST text detectorprint("[INFO] loading EAST text detector...")net = cv2.dnn.readNet(east_model)# construct a blob from the image and then perform a forward pass of# the model to obtain the two output layer setsblob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)start = time.time()net.setInput(blob)(scores, geometry) = net.forward(layerNames)end = time.time()# show timing information on text predictionprint("[INFO] text detection cost {:.6f} seconds".format(end - start))# decode the predictions, then apply non-maxima suppression to# suppress weak, overlapping bounding boxes(rects, confidences) = decode_predictions(scores, geometry)# NMS effectively takes the most likely text regions, eliminating other overlapping regionsboxes = non_max_suppression(np.array(rects), probs=confidences)# initialize the list of results to contain our OCR bounding boxes and textresults = []# the bounding boxes represent where the text regions are, then recognize the text.# loop over the bounding boxes and process the results, preparing the stage for actual text recognitionfor (startX, startY, endX, endY) in boxes:# scale the bounding boxes coordinates based on the respective ratiosstartX = int(startX * rW)startY = int(startY * rH)endX = int(endX * rW)endY = int(endY * rH)# in order to obtain a better OCR of the text we can potentially# add a bit of padding surrounding the bounding box -- here we# are computing the deltas in both the x and y directionsdX = int((endX - startX) * padding)dY = int((endY - startY) * padding)# apply padding to each side of the bounding box, respectivelystartX = max(0, startX - dX)startY = max(0, startY - dY)endX = min(origW, endX + (dX * 2))endY = min(origH, endY + (dY * 2))# extract the actual padded ROIroi = orig[startY:endY, startX:endX]# use Tesseract v4 to recognize a text ROI in an imagetext = pytesseract.image_to_string(roi, config=config)# add the bounding box coordinates and actual text string to the results listresults.append(((startX, startY, endX, endY), text))# sort the bounding boxes coordinates from top to bottom based on the y-coordinate of the bounding boxresults = sorted(results, key=lambda r: r[0][1])output = orig.copy()# loop over the resultsfor ((startX, startY, endX, endY), text) in results:# display the text OCR'd by Tesseractprint("OCR TEXT")print("========")print("{}\n".format(text))# strip out non-ASCII text so we can draw the text on the image using OpenCVtext = "".join([c if ord(c) < 128 else "" for c in text]).strip()# draw the text and a bounding box surrounding the text region of the input imagecv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 2)cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)# show the output imagecv2.imshow("Text Detection", output)while True:ret, image = cap.read()text_recognition(image)#  cv2.imshow('img', image)if cv2.waitKey(10) == ord("q"):break


