1. Keras环境安装

##参考Keras安装点击打开链接

2. 文本图片素材-文字切割并保存切割图片

# -*- coding: UTF-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as pltdef median_split_ranges(peek_ranges):new_peek_ranges = []widthes = []for peek_range in peek_ranges:w = peek_range[1] - peek_range[0] + 1widthes.append(w)widthes = np.asarray(widthes)median_w = np.median(widthes)for i, peek_range in enumerate(peek_ranges):num_char = int(round(widthes[i]/median_w, 0))if num_char > 1:char_w = float(widthes[i] / num_char)for i in range(num_char):start_point = peek_range[0] + int(i * char_w)end_point = peek_range[0] + int((i + 1) * char_w)new_peek_ranges.append((start_point, end_point))else:new_peek_ranges.append(peek_range)return new_peek_rangesdef extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):start_i = Noneend_i = Nonepeek_ranges = []for i, val in enumerate(array_vals):if val > minimun_val and start_i is None:start_i = ielif val > minimun_val and start_i is not None:passelif val < minimun_val and start_i is not None:end_i = iif end_i - start_i >= minimun_range:peek_ranges.append((start_i, end_i))start_i = Noneend_i = Noneelif val < minimun_val and start_i is None:passelse:raise ValueError("cannot parse this case...")return peek_rangesdef get_font_face_peek_ranges(path_test_image):image_color = cv2.imread(path_test_image)new_shape = (image_color.shape[1] * 2, image_color.shape[0] * 2)image_color = cv2.resize(image_color, new_shape)image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)adaptive_threshold = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 2)horizontal_sum = np.sum(adaptive_threshold, axis=1)plt.plot(horizontal_sum, range(horizontal_sum.shape[0]))plt.gca().invert_yaxis()# plt.show()peek_ranges = extract_peek_ranges_from_array(horizontal_sum)vertical_peek_ranges2d = []for peek_range in peek_ranges:start_y = peek_range[0]end_y = peek_range[1]line_img = adaptive_threshold[start_y:end_y, :]vertical_sum = np.sum(line_img, axis=0)vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum,minimun_val=40,minimun_range=1)vertical_peek_ranges2d.append(vertical_peek_ranges)vertical_peek_ranges2d = []for peek_range in peek_ranges:start_y = peek_range[0]end_y = peek_range[1]line_img = adaptive_threshold[start_y:end_y, :]vertical_sum = np.sum(line_img, axis=0)vertical_peek_ranges = extract_peek_ranges_from_array(vertical_sum,minimun_val=40,minimun_range=1)vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)vertical_peek_ranges2d.append(vertical_peek_ranges)return peek_ranges,vertical_peek_ranges2d,image_colorcolor = (0, 0, 255)
path_test_image = "tmp/font.png"
peek_ranges,vertical_peek_ranges2d,image_color = get_font_face_peek_ranges(path_test_image)for i, peek_range in enumerate(peek_ranges):for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):x = vertical_range[0]y = peek_range[0]w = vertical_range[1] - xh = peek_range[1] - yimage = image_color[y - 2:y + h + 2, x - 2:x + w + 2]pt1 = (x, y)pt2 = (x + w, y + h)cv2.rectangle(image_color, pt1, pt2, color)cv2.imshow('image', image_color)
cv2.waitKey(0)

3. 训练

FILE_PATH = "model.h5"   #模型进行存储和读取的地方
IMAGE_SIZE = 128
PATH = "fonts"imgs,labels,counter = read_file(PATH, IMAGE_SIZE)X_train,X_test,y_train,y_test = train_test_split(imgs,labels,test_size=0.2,random_state=0)X_train = X_train.reshape(X_train.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE)/255.0
X_test = X_test.reshape(X_test.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE) / 255.0X_train = X_train.astype('float32')
X_test = X_test.astype('float32')Y_train = np_utils.to_categorical(y_train, num_classes=counter)
Y_test = np_utils.to_categorical(y_test, num_classes=counter)model = Sequential()
model.add(Convolution2D(filters=32,kernel_size=(5, 5),padding='same',dim_ordering='th',input_shape=X_train.shape[1:])
)model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2),padding='same')
)model.add(Convolution2D(filters=64, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))model.add(Dense(counter))
model.add(Activation('softmax'))
model.summary()model.compile(optimizer='adam', optimizer,loss='categorical_crossentropy', metrics=['accuracy'])model.fit(X_train,Y_train,epochs=32,batch_size=32)loss, accuracy = model.evaluate(X_test, Y_test)print('test loss;', loss)
print('test accuracy:', accuracy)model.save(FILE_PATH)

4. 识别文字图片

a. 图片文字切割

b. 文字识别

# -*- coding: UTF-8 -*-
from keras.models import load_model
import cv2
import numpy as np
import utils
import osdef getLetter(model, img, name_list, IMAGE_SIZE):img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)img = img.reshape((1, 1, IMAGE_SIZE, IMAGE_SIZE))img = img.astype('float32')img = img/255.0result = model.predict_proba(img)max_index = np.argmax(result)picType,prob = max_index,result[0][max_index]if picType != -1:return name_list[picType],probelse:return ""FILE_PATH = "model.h5"
IMAGE_SIZE = 128
result = ""
path_test_image = "tmp/font.png"
name_list = readName()model = load_model(FILE_PATH)
peek_ranges,vertical_peek_ranges2d,image_color = utils.get_font_face_peek_ranges(path_test_image)for i, peek_range in enumerate(peek_ranges):for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):x = vertical_range[0]y = peek_range[0]w = vertical_range[1] - xh = peek_range[1] - yimage = image_color[y - 2:y + h + 2, x - 2:x + w + 2]letter,prob = getLetter(model, image, name_list, IMAGE_SIZE)code = letterresult += codeprint(result)

c. 测试结果(还需优化)

keras-文本图片文字识别相关推荐

  1. 2021-02-21 Python Easyocr 图片文字识别

    Python Easyocr 图片文字识别 前段时间做了车牌识别相关的内容分享,参看: 车牌识别(1)-车牌数据集生成 车牌识别(2)-搭建车牌识别模型 今天给大家分享一个简单的OCR文本识别工具:e ...

  2. 吴恩达《Machine Learning》精炼笔记 12:大规模机器学习和图片文字识别 OCR

    作者 | Peter 编辑 | AI有道 系列文章: 吴恩达<Machine Learning>精炼笔记 1:监督学习与非监督学习 吴恩达<Machine Learning>精 ...

  3. 图片文字识别(一):tesseract-ocr-4.00的安装与初步进行图片文字识别

    简介: tesseract-ocr可以对图像文字进行识别,为图文转换的工作时省去了大量时间.我们还可以通过不断的训练字库,使图像转换文本的能力不断增强,也可以调试模型使图像文字进行程序的识别率更高, ...

  4. 吴恩达《机器学习》第十八章:图片文字识别OCR

    文章目录 十八.应用实例:图片文字识别OCR 18.1 问题描述和流程图 18.2 滑动窗口 18.3 获取大量数据和人工数据 18.4 上限分析:下一步工作 十八.应用实例:图片文字识别OCR 18 ...

  5. Python图片文字识别——Windows下Tesseract-OCR的安装与使用

    Python图片文字识别--Windows下Tesseract-OCR的安装与使用 前言 Windows下Tesseract-OCR的安装与配置 Tesseract-OCR简介与版本选择 tesser ...

  6. Android 图片文字识别DEMO(基于百度OCR)

    前言   OCR 是 Optical Character Recognition 的缩写,翻译为光学字符识别,指的是针对印刷体字符,采用光学的方式将纸质文档中的文字转换成为黑白点阵的图像文件,通过识别 ...

  7. python存数据库c读数据库喷码加工_python图片文字识别

    Python语言读取Marc后处理文件基础知识_材料科学_工程科技_专业资料.Python语言简介,Marc计算结果文件读取,焊接模拟后处理实例 基于python 的焊接后处理知识要点: ? ?... ...

  8. 吴恩达机器学习(十五)—— 应用实例:图片文字识别

    应用实例:图片文字识别 1. 问题描述和流水线 2. 滑动窗口 3. 获取大量数据:人工数据合成 4. 上限分析:流水线的哪个模块最有改进价值   学习图片文字识别的应用实例要做的事情: 展示一个复杂 ...

  9. Python3 图片文字识别翻译——调用百度AI、百度翻译和有道翻译的API

    文章目录 Python3 图片文字识别翻译--调用百度AI.百度翻译和有道翻译的API 一.演示 二. API准备 三. 图片文字识别--调用百度AI文字识别API 四. 文字翻译 1. 百度翻译 请 ...

  10. 电脑端怎样具体操作图片文字识别?

    办公室职员最离不开的就是电脑了,那么大家平时会不会操作图片文字间的转换呢?并且是电脑端的图片文字识别.如果不会的话,可以看看今天小编的分享哈. 图片转文字的具体操作: 第一步:打开OCR文字识别软件, ...

最新文章

  1. Visual Studio插件
  2. leetcode:242 : 有效的字母异位词
  3. Python 中的万能之王 Lambda 函数
  4. md5 算法java实现_java实现MD5算法
  5. 【Unity】4.5 树木创建器
  6. 【图像边缘检测】基于matlab GUI神经网络算法边缘检测(带面板)【含Matlab源码 1346期】
  7. cv个人计算机SCI英文简历模板,个人英语简历模板|英文简历模板pdf百度云
  8. SRE 到底是什么?
  9. 《富爸爸穷爸爸》读书摘要
  10. Microsoft Teams管理(一)
  11. 3P(PS、PR、PDF编辑器Acrobat)中的基基本操作(一)
  12. 什么是淘宝私域流量?和公域流量流量有何区别?
  13. 高频交易配对交易学习——Copulas函数理解
  14. MADlib——基于SQL的数据挖掘解决方案(23)——分类之SVM
  15. (5)Linux基础——opendir/closedir 、readdir、mkdir 、rmdir、getcwd、chdir详细含义用法及介绍(基础)
  16. 课5 视频分镜的处理
  17. 动态内表的俩栗子_SAP刘梦_新浪博客
  18. 托福百日冲刺(五一记忆)(1)
  19. 【CAD】通过VBA获取CAD中的文本
  20. CSGO服务器租用如何选择合适的配置?CSGO服务器怎么选择?

热门文章

  1. Mac 如何在终端玩游戏
  2. iOS应用中增加emoji表情输入功能
  3. cisco 无线ap ME和LAP模式切换
  4. LSDyna在土木工程静力问题中的应用
  5. 微信投票微信刷票的技巧和意义
  6. Linux的实时监测命令(watch)
  7. CSS3 动画专栏:@keyframes与animation的恋曲
  8. 算法篇:神奇的卡塔兰数Catalan
  9. HDU 4417 Super Mario(划分树问题求不大于k的数有多少)
  10. JS统计字符串中汉字的个数