keras-文本图片文字识别
1. Keras环境安装
##参考Keras安装点击打开链接
2. 文本图片素材-文字切割并保存切割图片
# -*- coding: UTF-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as pltdef median_split_ranges(peek_ranges):new_peek_ranges = []widthes = []for peek_range in peek_ranges:w = peek_range[1] - peek_range[0] + 1widthes.append(w)widthes = np.asarray(widthes)median_w = np.median(widthes)for i, peek_range in enumerate(peek_ranges):num_char = int(round(widthes[i]/median_w, 0))if num_char > 1:char_w = float(widthes[i] / num_char)for i in range(num_char):start_point = peek_range[0] + int(i * char_w)end_point = peek_range[0] + int((i + 1) * char_w)new_peek_ranges.append((start_point, end_point))else:new_peek_ranges.append(peek_range)return new_peek_rangesdef extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):start_i = Noneend_i = Nonepeek_ranges = []for i, val in enumerate(array_vals):if val > minimun_val and start_i is None:start_i = ielif val > minimun_val and start_i is not None:passelif val < minimun_val and start_i is not None:end_i = iif end_i - start_i >= minimun_range:peek_ranges.append((start_i, end_i))start_i = Noneend_i = Noneelif val < minimun_val and start_i is None:passelse:raise ValueError("cannot parse this case...")return peek_rangesdef get_font_face_peek_ranges(path_test_image):image_color = cv2.imread(path_test_image)new_shape = (image_color.shape[1] * 2, image_color.shape[0] * 2)image_color = cv2.resize(image_color, new_shape)image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)adaptive_threshold = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 2)horizontal_sum = np.sum(adaptive_threshold, axis=1)plt.plot(horizontal_sum, range(horizontal_sum.shape[0]))plt.gca().invert_yaxis()# plt.show()peek_ranges = extract_peek_ranges_from_array(horizontal_sum)vertical_peek_ranges2d = []for peek_range in peek_ranges:start_y = peek_range[0]end_y = peek_range[1]line_img = adaptive_threshold[start_y:end_y, :]vertical_sum = np.sum(line_img, axis=0)vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum,minimun_val=40,minimun_range=1)vertical_peek_ranges2d.append(vertical_peek_ranges)vertical_peek_ranges2d = []for peek_range in peek_ranges:start_y = peek_range[0]end_y = peek_range[1]line_img = adaptive_threshold[start_y:end_y, :]vertical_sum = np.sum(line_img, axis=0)vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum,minimun_val=40,minimun_range=1)vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)vertical_peek_ranges2d.append(vertical_peek_ranges)return peek_ranges,vertical_peek_ranges2d,image_colorcolor = (0, 0, 255)
path_test_image = "tmp/font.png"
peek_ranges,vertical_peek_ranges2d,image_color = get_font_face_peek_ranges(path_test_image)for i, peek_range in enumerate(peek_ranges):for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):x = vertical_range[0]y = peek_range[0]w = vertical_range[1] - xh = peek_range[1] - yimage = image_color[y - 2:y + h + 2, x - 2:x + w + 2]pt1 = (x, y)pt2 = (x + w, y + h)cv2.rectangle(image_color, pt1, pt2, color)cv2.imshow('image', image_color)
cv2.waitKey(0)
3. 训练
FILE_PATH = "model.h5" #模型进行存储和读取的地方
IMAGE_SIZE = 128
PATH = "fonts"imgs,labels,counter = read_file(PATH, IMAGE_SIZE)X_train,X_test,y_train,y_test = train_test_split(imgs,labels,test_size=0.2,random_state=0)X_train = X_train.reshape(X_train.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE)/255.0
X_test = X_test.reshape(X_test.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE) / 255.0X_train = X_train.astype('float32')
X_test = X_test.astype('float32')Y_train = np_utils.to_categorical(y_train, num_classes=counter)
Y_test = np_utils.to_categorical(y_test, num_classes=counter)model = Sequential()
model.add(Convolution2D(filters=32,kernel_size=(5, 5),padding='same',dim_ordering='th',input_shape=X_train.shape[1:])
)model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2),padding='same')
)model.add(Convolution2D(filters=64, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))model.add(Dense(counter))
model.add(Activation('softmax'))
model.summary()model.compile(optimizer='adam', optimizer,loss='categorical_crossentropy', metrics=['accuracy'])model.fit(X_train,Y_train,epochs=32,batch_size=32)loss, accuracy = model.evaluate(X_test, Y_test)print('test loss;', loss)
print('test accuracy:', accuracy)model.save(FILE_PATH)
4. 识别文字图片
a. 图片文字切割
b. 文字识别
# -*- coding: UTF-8 -*- from keras.models import load_model import cv2 import numpy as np import utils import osdef getLetter(model, img, name_list, IMAGE_SIZE):img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)img = img.reshape((1, 1, IMAGE_SIZE, IMAGE_SIZE))img = img.astype('float32')img = img/255.0result = model.predict_proba(img)max_index = np.argmax(result)picType,prob = max_index,result[0][max_index]if picType != -1:return name_list[picType],probelse:return ""FILE_PATH = "model.h5" IMAGE_SIZE = 128 result = "" path_test_image = "tmp/font.png" name_list = readName()model = load_model(FILE_PATH) peek_ranges,vertical_peek_ranges2d,image_color = utils.get_font_face_peek_ranges(path_test_image)for i, peek_range in enumerate(peek_ranges):for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):x = vertical_range[0]y = peek_range[0]w = vertical_range[1] - xh = peek_range[1] - yimage = image_color[y - 2:y + h + 2, x - 2:x + w + 2]letter,prob = getLetter(model, image, name_list, IMAGE_SIZE)code = letterresult += codeprint(result)
c. 测试结果(还需优化)
keras-文本图片文字识别相关推荐
- 2021-02-21 Python Easyocr 图片文字识别
Python Easyocr 图片文字识别 前段时间做了车牌识别相关的内容分享,参看: 车牌识别(1)-车牌数据集生成 车牌识别(2)-搭建车牌识别模型 今天给大家分享一个简单的OCR文本识别工具:e ...
- 吴恩达《Machine Learning》精炼笔记 12:大规模机器学习和图片文字识别 OCR
作者 | Peter 编辑 | AI有道 系列文章: 吴恩达<Machine Learning>精炼笔记 1:监督学习与非监督学习 吴恩达<Machine Learning>精 ...
- 图片文字识别(一):tesseract-ocr-4.00的安装与初步进行图片文字识别
简介: tesseract-ocr可以对图像文字进行识别,为图文转换的工作时省去了大量时间.我们还可以通过不断的训练字库,使图像转换文本的能力不断增强,也可以调试模型使图像文字进行程序的识别率更高, ...
- 吴恩达《机器学习》第十八章:图片文字识别OCR
文章目录 十八.应用实例:图片文字识别OCR 18.1 问题描述和流程图 18.2 滑动窗口 18.3 获取大量数据和人工数据 18.4 上限分析:下一步工作 十八.应用实例:图片文字识别OCR 18 ...
- Python图片文字识别——Windows下Tesseract-OCR的安装与使用
Python图片文字识别--Windows下Tesseract-OCR的安装与使用 前言 Windows下Tesseract-OCR的安装与配置 Tesseract-OCR简介与版本选择 tesser ...
- Android 图片文字识别DEMO(基于百度OCR)
前言 OCR 是 Optical Character Recognition 的缩写,翻译为光学字符识别,指的是针对印刷体字符,采用光学的方式将纸质文档中的文字转换成为黑白点阵的图像文件,通过识别 ...
- python存数据库c读数据库喷码加工_python图片文字识别
Python语言读取Marc后处理文件基础知识_材料科学_工程科技_专业资料.Python语言简介,Marc计算结果文件读取,焊接模拟后处理实例 基于python 的焊接后处理知识要点: ? ?... ...
- 吴恩达机器学习(十五)—— 应用实例:图片文字识别
应用实例:图片文字识别 1. 问题描述和流水线 2. 滑动窗口 3. 获取大量数据:人工数据合成 4. 上限分析:流水线的哪个模块最有改进价值 学习图片文字识别的应用实例要做的事情: 展示一个复杂 ...
- Python3 图片文字识别翻译——调用百度AI、百度翻译和有道翻译的API
文章目录 Python3 图片文字识别翻译--调用百度AI.百度翻译和有道翻译的API 一.演示 二. API准备 三. 图片文字识别--调用百度AI文字识别API 四. 文字翻译 1. 百度翻译 请 ...
- 电脑端怎样具体操作图片文字识别?
办公室职员最离不开的就是电脑了,那么大家平时会不会操作图片文字间的转换呢?并且是电脑端的图片文字识别.如果不会的话,可以看看今天小编的分享哈. 图片转文字的具体操作: 第一步:打开OCR文字识别软件, ...
最新文章
- Visual Studio插件
- leetcode:242 : 有效的字母异位词
- Python 中的万能之王 Lambda 函数
- md5 算法java实现_java实现MD5算法
- 【Unity】4.5 树木创建器
- 【图像边缘检测】基于matlab GUI神经网络算法边缘检测(带面板)【含Matlab源码 1346期】
- cv个人计算机SCI英文简历模板,个人英语简历模板|英文简历模板pdf百度云
- SRE 到底是什么?
- 《富爸爸穷爸爸》读书摘要
- Microsoft Teams管理(一)
- 3P(PS、PR、PDF编辑器Acrobat)中的基基本操作(一)
- 什么是淘宝私域流量?和公域流量流量有何区别?
- 高频交易配对交易学习——Copulas函数理解
- MADlib——基于SQL的数据挖掘解决方案(23)——分类之SVM
- (5)Linux基础——opendir/closedir 、readdir、mkdir 、rmdir、getcwd、chdir详细含义用法及介绍(基础)
- 课5 视频分镜的处理
- 动态内表的俩栗子_SAP刘梦_新浪博客
- 托福百日冲刺(五一记忆)(1)
- 【CAD】通过VBA获取CAD中的文本
- CSGO服务器租用如何选择合适的配置?CSGO服务器怎么选择?