Seq2Seq识别车牌项目demo
一、Seq相关概念
1、Seq2Seq的作用:
Seq2S 是一类特殊的 RNN,在机器翻译、文本自动摘要和语音识别中有着成功的应用,可以应用它来解决输入和输出不等长问题,典型的是在语言的互译过程中,输入和输出不等长。
2、SeqSeq的结构
分为两部分:编码器和解码器
其中:编码器和解码器分别是两个RNN的网络,编码器用来分析输入序列,,解码器用来生成输出序列。
此处可以使用的 RNN 变体:
- RNN 可以是单向的或双向的,后者将捕捉双向的长时间依赖关系。
- RNN 可以有多个隐藏层,层数的选择对于优化来说至关重要...更深的网络可以学到更多知识,另一方面,训练需要花费很长时间而且可能会过度拟合。
- RNN 可以有多个隐藏层,层数的选择对于优化来说至关重要...更深的网络可以学到更多知识,另一方面,训练需要花费很长时间而且可能会过度拟合。
- RNN 可以具有嵌入层,其将单词映射到嵌入空间中,在嵌入空间中相似单词的映射恰好也非常接近。
- RNN 可以使用简单的重复性单元、LSTM、窥孔 LSTM 或者 GRU。
二、识别车牌
1、制作车牌数据集
注意:
- 模拟的车牌的图片名为7个数字对应车牌中的字符,用作网络学习的标签
- 为了增加模拟车牌的多样性,可以在生成的模拟车牌中添加噪声、仿射变换....等操作
import numpy as np
import cv2
from PIL import ImageFont, ImageDraw, Image
import oschar_index_map = {"京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10, "浙": 11, "皖": 12,"闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20, "琼": 21, "川": 22, "贵": 23, "云": 24,"藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30,"0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36, "6": 37, "7": 38, "8": 39, "9": 40,"A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,"J": 49, "K": 50, "L": 51, "M": 52, "N": 53,"P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,"W": 61, "X": 62, "Y": 63, "Z": 64}chars_list = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖","闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂","琼", "川", "贵", "云","藏", "陕", "甘", "青", "宁", "新","0", "1", "2", "3", "4", "5","6", "7", "8", "9","A", "B", "C", "D", "E", "F", "G", "H","J", "K", "L", "M", "N","P", "Q", "R", "S", "T", "U", "V","W", "X", "Y", "Z"]font_Image = ImageFont.truetype("./simhei.ttf", 66, encoding="utf-8")
font_Image2=ImageFont.truetype("./simhei.ttf", 72, encoding="utf-8")abbr_chars = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂","琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新"]letter_chars =["A","B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X","Y", "Z"]
num_chars =["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]zoords=[(15,30),(78,30),(162,30),(215,30),(265,30),(315,30),(365,30)] #写的字符的位置'''
isPosRandom 是否随机位置,样本不规则方便训练后在真实图像上有用
isGauss 是否高斯模糊,方便训练,当然实际中可能还需要你做仿射变换
first_char_index:由于训练样本必须包含所有的汉字,这里传递的什么汉字就生成什么汉字开头的车牌
'''
plate_type=["green","blue","yellow"]plate_colors={"blue":(255,255,255),"green":(0,0,0),"yellow":(0,0,0)}img = np.ones((150,420),dtype=np.uint8) #random.random()方法后面不能加数据类型
# img = np.random.random((3,3)) #生成随机数都是小数无法转化颜色,无法调用cv2.cvtColor函数
# img[0,0]=100
# img[0,1]=150
# img[0,2]=255y_bg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR) #黄色车牌的背景
y_bg[:,:,0] = 0
y_bg[:,:,1] = 255
y_bg[:,:,2] = 255g_bg= cv2.cvtColor(img,cv2.COLOR_GRAY2BGR) #绿色车牌的背景
g_bg[:,:,0] = 0
g_bg[:,:,1] = 255
g_bg[:,:,2] = 0b_bg= cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) #蓝色车牌的背景
b_bg[:,:,0] = 255
b_bg[:,:,1] = 0
b_bg[:,:,2] = 0bg_dic = {"yellow":y_bg, "blue":b_bg,"green":g_bg}def getPlate(bg_path_root="",plate_type="blue",isPosRandom=False,isGauss=False,first_char_index=-1):color = plate_colors[plate_type] #获取背景色if(bg_path_root==""):bg_path_root="./plate"bg_path = (bg_path_root+"/{}.png").format(plate_type) #生成背景cv2.imwrite(bg_path, np.uint8(bg_dic[plate_type])) #生成背景图img_bg = cv2.imread(bg_path, 1) #读取背景,0表示灰度模式,1表示彩色模式,2表示读取的包括透明通道chars_index_arr=[]try:image_pil = Image.fromarray(cv2.cvtColor(img_bg, cv2.COLOR_BGR2RGB)) #将背景转为PIL格式imDraw = ImageDraw.Draw(image_pil)for i in range(7): #共7个字符if(first_char_index>=0 and i==0):first_char_index=first_char_index % len(abbr_chars)char=abbr_chars[first_char_index]else:char=getCharByIndex(i)offset_x=np.random.randint(-5,5)offset_y=np.random.randint(-5,5)zoord=zoords[i]if(isPosRandom): zoord=(zoords[i][0]+offset_x,zoords[i][1]+offset_y) #设置是否对字体进行偏移(某些字体本身会进行偏移,需要进行调整)chars_index_arr.append(char_index_map[char])if (i == 0):imDraw.text(zoord, text=char, font=font_Image, fill=color, stroke_width=1,stroke_fill=color)else:imDraw.text(zoord, text=char, font=font_Image2, fill=color, stroke_width=1,stroke_fill=color, align="center")img_cv2 = cv2.cvtColor(np.asarray(image_pil), cv2.COLOR_RGB2BGR)if(isGauss): #是否进行高斯模糊img_cv2=cv2.GaussianBlur(img_cv2,(9,9),0)return img_cv2,chars_index_arrexcept Exception as e:print(e)passreturn None,Nonedef getCharByIndex(index):if(index==0):r_index = np.random.randint(0,len(abbr_chars))return abbr_chars[r_index] #在第1个位置随机返回一个汉字passelif(index==1):r_index = np.random.randint(0, len(letter_chars))return letter_chars[r_index] #在第2个位置随机返回一个字母passelse:new_chars=letter_chars.copy() #其它位置是字母和数字构成new_chars.extend(num_chars)r_index = np.random.randint(0, len(new_chars))return new_chars[r_index]passdef getCharsByIndexs(index_list): #获取车牌的内容plate_chars = ""for i in range(len(index_list)):v = index_list[i]plate_chars += chars_list[v]return plate_charsif __name__ == '__main__':# plate1,charIndexs1=getPlate("","yellow",True,True,13) #获取黄色的车牌# plate2,charIndexs2=getPlate() #(默认)获取蓝色的车牌# plate3,charIndexs3=getPlate("","green") #获取绿色的车牌## print(charIndexs1)#标签# print(getCharsByIndexs(charIndexs1))#字符# cv2.imshow("1", plate1)# cv2.imshow("2", plate2)# cv2.imshow("3", plate3)# cv2.waitKey(0)# 车牌生成器for j in range(3): #3种颜色的背景所以循环3次_plate_type = plate_type[j]for i in range(1000): #每种颜色的车牌模拟生成的总数first_char_index = iisPosRandom = np.random.randint(0, 2) #是否进行随机位置写入isGauss = np.random.randint(0, 2) #是否模糊image, index_list = getPlate("", _plate_type, isPosRandom, isPosRandom,first_char_index)if not os.path.exists("./plate"):os.makedirs("./plate")image_path = r"./plate"index_chars = ""for v in index_list:index_chars += str(v) + ","index_chars = index_chars.rstrip(",")cv2.imwrite("{0}/{1}.jpg".format(image_path, index_chars), image) #写入生成的车牌(图片名为7个字符在字典中对应的值)
样本示例如下:
2.搭建网络训练
(1)采样文件
注意:此处使用使用二值化转为黑白形式的图片学习更容易
import os
import torch
import numpy as np
from PIL import Image
import torch.utils.data as data
from torchvision import transforms
import cv2
from sklearn.preprocessing import OneHotEncoderdata_transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])
])class Sampling(data.Dataset):def __init__(self,root):self.transform = data_transformsself.imgs = []self.labels = []for filenames in os.listdir(root):x = os.path.join(root,filenames)y = filenames.split('.')[0] #图片的名字就是里边的数字y = y.split(",")# print(x)# print(y)self.imgs.append(x) #将图片的绝对路径放在一起self.labels.append(y) #标签就是文件名(数字内容)def __len__(self):return len(self.imgs)def __getitem__(self, index):img_path = self.imgs[index]img = cv2.imread(img_path,1)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转灰度图ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) #二值化处理img = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)img = Image.fromarray(img)# print(type(img))# cv2.imshow("", img)# cv2.waitKey(0)# cv2.destroyAllWindows()# img = Image.open(img_path)img = self.transform(img)label = self.labels[index]# print('label ',label)label = self.one_hot(label)# print('label ', label)return img,labeldef one_hot(self,x): z = np.zeros(shape=[7,65])for i in range(7):# print(x)index = int(x[i])z[i][index] = 1return zif __name__ == '__main__':samping = Sampling("./plate")dataloader = data.DataLoader(samping,1,shuffle=True)for i,(img,label) in enumerate(dataloader):# print(i)print(img.shape) #torch.Size([64, 3, 150, 420])print(label.shape) #torch.Size([64, 7, 65])
(2)训练网络
注意:主网络分为解码器和编码器,此处使用了LSTM
import os
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
import myseq_samplingclass Encoder(nn.Module):def __init__(self):super(Encoder, self).__init__()self.fc1 = nn.Sequential(nn.Linear(450,512),nn.BatchNorm1d(num_features=512),nn.ReLU())self.lstm = nn.LSTM(input_size=512,hidden_size=512,num_layers=1,batch_first=True)def forward(self, x):x = x.reshape(-1,450,420).permute(0,2,1) #(N,3x150,420)-->(N,420,150)x = x.reshape(-1,450) #(Nx420,450)fc1 = self.fc1(x)fc1 = fc1.reshape(-1, 420, 512)lstm,(h_n,h_c) = self.lstm(fc1,None)out = lstm[:,-1,:]return outclass Decoder(nn.Module):def __init__(self):super(Decoder, self).__init__()self.lstm = nn.LSTM(input_size=512,hidden_size=512,num_layers=2,batch_first=True)self.out = nn.Linear(512,65)def forward(self,x):x = x.reshape(-1,1,512)x = x.expand(-1,7,512)lstm,(h_n,h_c) = self.lstm(x,None)y1 = lstm.reshape(-1,512)out = self.out(y1)output = out.reshape(-1,7,65)return outputclass MainNet (nn.Module):def __init__(self):super(MainNet, self).__init__()self.encoder = Encoder()self.decoder = Decoder()def forward(self, x):encoder = self.encoder(x)# print(' e ',encoder.shape)decoder = self.decoder(encoder)return decoderif __name__ == '__main__':BATCH = 64EPOCH = 100000save_path = './my_param/seq2seq.pth'device = torch.device("cuda" if torch.cuda.is_available() else "cpu")net = MainNet().to(device)# batch_x = torch.randn([64,3,150,420]).to(device)# output = net(batch_x)# print(output.shape)# exit()opt = torch.optim.Adam(net.parameters())loss_func = nn.MSELoss()if os.path.exists(save_path):net.load_state_dict(torch.load(save_path))else:print("No Params!")train_data = myseq_sampling.Sampling(root="./plate")train_loader = data.DataLoader(dataset=train_data,batch_size=BATCH, shuffle=True,num_workers=4)for epoch in range(EPOCH):for i, (x, y) in enumerate(train_loader):batch_x = x.to(device)batch_y = y.float().to(device)output = net(batch_x)# print(output.shape) #torch.Size([64, 7, 65])# print(batch_y.shape) #torch.Size([64, 7, 65])loss = loss_func(output,batch_y)if i % 10 == 0:print(loss)opt.zero_grad()loss.backward()opt.step()if i % 100 == 0:label_y = torch.argmax(y,2).detach().numpy()out_y = torch.argmax(output,2).cpu().detach().numpy()accuracy = np.sum(out_y == label_y,dtype=np.float32)/(BATCH * 7)print("epoch:{},i:{},loss:{:.6f},acc:{:.2f}%".format(epoch,i,loss.item(),accuracy * 100))print("label_y:",label_y[0])print("out_y:",out_y[0])torch.save(net.state_dict(), save_path)
样本比较少时可以训练处过拟合版本:
参考资料:
http://c.biancheng.net/view/1947.html
http://zh.gluon.ai/chapter_natural-language-processing/seq2seq.html
https://blog.csdn.net/Jerr__y/article/details/53749693
Seq2Seq识别车牌项目demo相关推荐
- 识别车牌号码demo php,Android车牌识别 Demo 源码 能识别蓝色和黄色车牌的OCR android版本...
[实例简介] 文字描述:一,开发环境 jse eclipse(Kepler,32bit),jdk 1.6.0 _45(32bit),adt 23.0.6,支持Android SDK版本区间为8~19( ...
- vs添加系统环境变量不识别_项目经验不重样!3个基于SpringBoot 的图片识别处理系统送给你...
转载:https://mp.weixin.qq.com/s/WDMyIfOi2ogw0mKl3XxQdQ 最近看了太多读者小伙伴的简历,发现各种商城/秒杀系统/在线教育系统真的是挺多的.推荐一下昨晚找 ...
- bp神经网络实现人脸识别,车牌识别深度神经网络
1.您好,请问您有基于BP神经网络算法的车牌识别的程序代码吗?用matlab可以运行的那种. 1.对样本集进行归一化 2.创建BP神经网络 3.设置网络的训练参数 4.把样本输入BP网络进行训练 5. ...
- 识别车牌是什么神经网络,车牌识别深度神经网络
1.急求用BP神经网络实现车牌识别的MATLAB程序代码 车牌识别技术(Vehicle License Plate Recognition,VLPR) 是计算机视频图像识别技术在车辆牌照识别中的一种应 ...
- 真香!用Python检测和识别车牌(附代码)
车牌检测与识别技术用途广泛,可以用于道路系统.无票停车场.车辆门禁等.这项技术结合了计算机视觉和人工智能. 本文将使用Python创建一个车牌检测和识别程序.该程序对输入图像进行处理,检测和识别车牌, ...
- 如何使用 Python 检测和识别车牌(附 Python 代码)
文章目录 创建Python环境 如何在您的计算机上安装Tesseract OCR? 技术提升 磨砺您的Python技能 车牌检测与识别技术用途广泛,可以用于道路系统.无票停车场.车辆门禁等.这项技术结 ...
- 图像解析——(java)识别车牌步骤——更新中
## 车牌识别过程说明文档整体流程主要分两个大的步骤: - 1.使用多种算法,提取到车牌的轮廓,按轮廓从原图片获取车牌的切图,使用SVM算法模型,判断该切图是否是车牌 - 2.根据车牌切图,判定车牌颜 ...
- OpenCV进阶之路:神经网络识别车牌字符
1. 关于OpenCV进阶之路 前段时间写过一些关于OpenCV基础知识方面的系列文章,主要内容是面向OpenCV初学者,介绍OpenCV中一些常用的函数的接口和调用方法,相关的内容在OpenCV的手 ...
- .Net 调用阿里云接口-识别车牌
参考文档:生成URL - 阿里云视觉智能开放平台 - 阿里云 参考文档:https://next.api.aliyun.com/api/ocr/2019-12-30/RecognizeLicenseP ...
最新文章
- df满足条件的值修改_文科生学 Python 系列 16:泰坦尼克数据 2(缺失值处理)
- 用ggplot包画一个简单饼图
- gson 自定义对象转换格式
- c mysql 编译_MySQL编译安装之cmake
- Android Studio 快捷键大全(Mac系统)
- erp软件是什么软件
- 大话设计模式读书笔记10----外观模式(Facade)
- Redis3集群搭建
- 玩转大数据系列之二:数据分析与处理
- mysql url 解析json数据_如何在MySQL中编写可以解析列中JSON数据的查...
- ubuntu MeshLab安装
- Java大数开根号板子
- ffmpeg快速将mkv转mp4
- Nginx 局域网共享
- 进程proc文件介绍
- “圆周率的计算”实例详解
- 一道逻辑题 房间里有100盏电灯
- 守望者的逃离 动态规划+滚动数组
- IE浏览器代理服务器总是自动打开,取消勾选隔一会又会打开
- Sql 日月年 日期格式转 月日年 日期格式