前言

最近做了一个身份证号码识别项目，在此分享一下。视频效果如下所示，共有两种识别方法，其一就是直接上传身份证号码的截图进行识别，第二就是上传一张完整的身份证图像，然后通过逐步的处理定位身份证号码的位置，最后进行识别。视频效果如下所示：

身份证号码识别（Opencv，Pytorch）

模型结构

使用的深度学习模型为CRNN，模型代码如下所示：

# crnn.py
import argparse, os
import torch
import torch.nn as nnclass BidirectionalLSTM(nn.Module):def __init__(self, nInput_size, nHidden, nOut):super(BidirectionalLSTM, self).__init__()self.lstm = nn.LSTM(nInput_size, nHidden, bidirectional=True)self.linear = nn.Linear(nHidden * 2, nOut)def forward(self, input):recurrent, (hidden, cell) = self.lstm(input)T, b, h = recurrent.size()t_rec = recurrent.view(T * b, h)output = self.linear(t_rec)  # [T * b, nOut]output = output.view(T, b, -1)  # 输出变换为[seq,batch,类别总数]return outputclass CNN(nn.Module):def __init__(self, imageHeight, nChannel):super(CNN, self).__init__()assert imageHeight % 32 == 0, 'image Height has to be a multiple of 32'self.depth_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=nChannel, kernel_size=3, stride=1, padding=1,groups=nChannel)self.point_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=64, kernel_size=1, stride=1, padding=0,groups=1)self.relu0 = nn.ReLU(inplace=True)self.pool0 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64)self.point_conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, groups=1)self.relu1 = nn.ReLU(inplace=True)self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)self.depth_conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128)self.point_conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm2 = nn.BatchNorm2d(256)self.relu2 = nn.ReLU(inplace=True)self.depth_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)self.relu3 = nn.ReLU(inplace=True)self.pool3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))self.depth_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)self.point_conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm4 = nn.BatchNorm2d(512)self.relu4 = nn.ReLU(inplace=True)self.depth_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, groups=512)self.point_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.relu5 = nn.ReLU(inplace=True)self.pool5 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))# self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)self.depth_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, groups=512)self.point_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)self.batchNorm6 = nn.BatchNorm2d(512)self.relu6 = nn.ReLU(inplace=True)def forward(self, input):depth0 = self.depth_conv0(input)point0 = self.point_conv0(depth0)relu0 = self.relu0(point0)pool0 = self.pool0(relu0)# print(pool0.size())depth1 = self.depth_conv1(pool0)point1 = self.point_conv1(depth1)relu1 = self.relu1(point1)pool1 = self.pool1(relu1)# print(pool1.size())depth2 = self.depth_conv2(pool1)point2 = self.point_conv2(depth2)batchNormal2 = self.batchNorm2(point2)relu2 = self.relu2(batchNormal2)# print(relu2.size())depth3 = self.depth_conv3(relu2)point3 = self.point_conv3(depth3)relu3 = self.relu3(point3)pool3 = self.pool3(relu3)# print(pool3.size())depth4 = self.depth_conv4(pool3)point4 = self.point_conv4(depth4)batchNormal4 = self.batchNorm4(point4)relu4 = self.relu4(batchNormal4)# print(relu4.size())depth5 = self.depth_conv5(relu4)point5 = self.point_conv5(depth5)relu5 = self.relu5(point5)pool5 = self.pool5(relu5)# print(pool5.size())depth6 = self.depth_conv6(pool5)point6 = self.point_conv6(depth6)batchNormal6 = self.batchNorm6(point6)relu6 = self.relu6(batchNormal6)# print(relu6.size())return relu6class CRNN(nn.Module):def __init__(self, imgHeight, nChannel, nClass, nHidden):super(CRNN, self).__init__()self.cnn = nn.Sequential(CNN(imgHeight, nChannel))self.lstm = nn.Sequential(BidirectionalLSTM(512, nHidden, nHidden),BidirectionalLSTM(nHidden, nHidden, nClass),)def forward(self, input):conv = self.cnn(input)# pytorch框架输出结构为BCHWbatch, channel, height, width = conv.size()assert height == 1, "the output height must be 1."# 将height==1的维度去掉-->BCWconv = conv.squeeze(dim=2)# 调整各个维度的位置(B,C,W)->(W,B,C)，对应lstm的输入(seq,batch,input_size)conv = conv.permute(2, 0, 1)output = self.lstm(conv)return outputif __name__ == "__main__":x = torch.rand(1, 1, 32, 300)model = CRNN(imgHeight=32, nChannel=1, nClass=12, nHidden=256)y = model(x)print(y.shape)

数据集

使用的数据集为自制数据集，训练集有2w张图像，测试集中有2000张图像。部分图像如下所示：数据集中共包含11个字符，包含0到9的数字，以及字符X。

模型训练

from model import CRNN
from mydataset import CRNNDataSet
from torch.utils.data import DataLoader
import torch
from torch import optim
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import os
import randomdef decode(preds):char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','X'] + [" "]preds=list(preds)pred_text = ''for i,j in enumerate(preds):if j==n_class-1:continueif i==0:pred_text+=char_set[j]continueif preds[i-1]!=j:pred_text += char_set[j]return pred_text
def getAcc(preds,labs):acc=0char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','X'] + [" "]labs=labs.cpu().detach().numpy()preds = preds.cpu().detach().numpy()preds=np.argmax(preds,axis=-1)preds=np.transpose(preds,(1,0))out=[]for pred in preds:out_txt=decode(pred)out.append(out_txt)ll=[]for lab in labs:a=lab[lab!=-1]b=[char_set[i] for i in a]b="".join(b)ll.append(b)for a1,a2 in zip(out,ll):if a1==a2:acc+=1return acc/batch_sizebatch_size=64
n_class = 12data_dir='train'
datas=os.listdir(data_dir)train_lines=os.listdir('train')
val_lines=os.listdir('val')trainData = CRNNDataSet(lines=train_lines,train=True,img_width=300,data_dir='train')
trainLoader = DataLoader(dataset=trainData, batch_size=batch_size, shuffle=True, num_workers=1)valData = CRNNDataSet(lines=val_lines,train=False,img_width=300,data_dir='val')
valLoader = DataLoader(dataset=valData, batch_size=batch_size, shuffle=False, num_workers=1)device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = CRNN(imgHeight=32, nChannel=1, nClass=n_class, nHidden=256)
net=net.to(device)stcdics=torch.load('./model.pth')
net.load_state_dict(state_dict=stcdics)loss_func = torch.nn.CTCLoss(blank=n_class - 1)  # 注意，这里的CTCLoss中的 blank是指空白字符的位置，在这里是第65个,也即最后一个
optimizer = torch.optim.Adam(net.parameters(), lr=0.0005, betas=(0.5, 0.999))
#学习率衰减
lr_scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)#画图列表
trainLoss=[]
valLoss=[]
trainAcc=[]
valAcc=[]
if __name__ == '__main__':#设置迭代次数100次Epoch=50epoch_step = len(train_lines) // batch_sizefor epoch in range(1, Epoch + 1):net.train()train_total_loss = 0val_total_loss=0train_total_acc = 0val_total_acc = 0with tqdm(total=epoch_step, desc=f'Epoch{epoch}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:for step, (features, label) in enumerate(trainLoader, 1):labels = torch.IntTensor([])for j in range(label.size(0)):labels = torch.cat((labels, label[j]), 0)labels=labels[labels!=-1]features = features.to(device)labels = labels.to(device)loss_func=loss_func.to(device)batch_size = features.size()[0]out = net(features)log_probs = out.log_softmax(2).requires_grad_()targets = labelsinput_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))target_lengths = torch.where(label!=-1,1,0).sum(dim=-1)loss = loss_func(log_probs, targets, input_lengths, target_lengths)acc=getAcc(out,label)optimizer.zero_grad()loss.backward()optimizer.step()train_total_loss += losstrain_total_acc += accpbar.set_postfix(**{'loss': train_total_loss.item() / (step),'acc': train_total_acc/ (step), })pbar.update(1)trainLoss.append(train_total_loss.item()/step)trainAcc.append(train_total_acc/step)#保存模型torch.save(net.state_dict(), 'model.pth')#验证net.eval()for step, (features, label) in enumerate(valLoader, 1):with torch.no_grad():labels = torch.IntTensor([])for j in range(label.size(0)):labels = torch.cat((labels, label[j]), 0)labels = labels[labels != -1]features = features.to(device)labels = labels.to(device)loss_func = loss_func.to(device)batch_size = features.size()[0]out = net(features)log_probs = out.log_softmax(2).requires_grad_()targets = labelsinput_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))target_lengths = torch.where(label != -1, 1, 0).sum(dim=-1)loss = loss_func(log_probs, targets, input_lengths, target_lengths)acc = getAcc(out, label)val_total_loss+=lossval_total_acc+=accvalLoss.append(val_total_loss.item()/step)valAcc.append(val_total_acc/step)lr_scheduler.step()print('val_loss=',val_total_loss.item()/step)print('val_acc=',val_total_acc/step)# print(trainLoss)# print(valLoss)"""绘制loss acc曲线图"""plt.figure()plt.plot(trainLoss, 'r')plt.plot(valLoss, 'b')plt.title('Training and validation loss')plt.xlabel("Epochs")plt.ylabel("Loss")plt.legend(["Loss", "Validation Loss"])plt.savefig('loss.png')plt.figure()plt.plot(trainAcc, 'r')plt.plot(valAcc, 'b')plt.title('Training and validation acc')plt.xlabel("Epochs")plt.ylabel("Acc")plt.legend(["Acc", "Validation Acc"])plt.savefig('acc.png')# plt.show()

项目结构&源码下载

项目结构图如下：

运行main.py即可弹出界面。界面如下：

项目下载：下载地址

基于深度学习的身份证号码识别（OCR，Opencv，Pytorch）相关推荐

表单识别（四）-基于深度学习的表单识别)-OCR
(论文研读后,感觉有用的一些笔记,主要是给自己记录) 论文:熊雨点,基于深度学习的表单识别系统的研究与实现基于深度学习的表单识别前言: 文档检测方法: 基于扩张卷积残差网络的表单文档定位方法: 扩 ...
一种基于深度学习的增值税发票影像识别系统
一种基于深度学习的增值税发票影像识别系统-专利技术交底书缩略语和关键术语定义 1.卷积神经网络(Convolutional Neural Networks, CNN)是一类包含卷积计算且具有深度结构 ...
蚂蚁金服张洁：基于深度学习的支付宝人脸识别技术解秘-1
蚂蚁金服张洁:基于深度学习的支付宝人脸识别技术解秘(1) 2015-08-13 10:22 于雪 51CTO 字号:T | T 用户身份认证是互联网金融发展的基石.今年三月,在德国汉诺威举办的IT展览 ...
opencv交通标志识别_教你从零开始做一个基于深度学习的交通标志识别系统
教你从零开始做一个基于深度学习的交通标志识别系统基于Yolo v3的交通标志识别系统及源码自动驾驶之--交通标志识别在本文章你可以学习到如何训练自己采集的数据集,生成模型,并用yolo v3算法 ...
DeepEye：一个基于深度学习的程序化交易识别与分类方法
DeepEye:一个基于深度学习的程序化交易识别与分类方法徐广斌,张伟上海证券交易所资本市场研究所,上海 200120 上海证券交易所产品创新中心,上海 200120 摘要:基于沪市A股交 ...
《基于深度学习的加密流量识别研究》-2022毕设笔记
参考文献: 基于深度学习的网络流量分类及异常检测方法研究_王伟基于深度学习的加密流量分类技术研究与实现_马梦叠基于深度学习的加密流量识别研究综述及展望_郭宇斌基于深度学习的加密流量算法识别研究_ ...
基于深度学习的单人步态识别系统
基于深度学习的单人步态识别系统(目前数据集大小15人,准确率100%) 一.数据预处理 a.步态轮廓图 b.头部轮廓图实现方式: 核心代码: c.骨架图二.提取步态特征 a.角度特征 b.下肢特征 ...
【手写汉字识别】基于深度学习的脱机手写汉字识别技术研究
写在前面最近一段时间在为本科毕业设计做一些知识储备,方向与手写识别的系统设计相关,在看到一篇2019年题为<基于深度学习的脱机手写汉字识别技术研究>的工学硕士论文后,感觉收获比较大,准备 ...
毕业设计之 --- 基于深度学习的行人重识别(person reid)
文章目录 0 前言 1 技术背景 2 技术介绍 3 重识别技术实现 3.1 数据集 3.2 行人检测 3.2 Person REID 3.2.1 算法原理 3.2.2 算法流程图 4 实现效果 5 部 ...

基于深度学习的身份证号码识别（OCR，Opencv，Pytorch）

文章目录

前言

模型结构

数据集

模型训练

项目结构&源码下载

基于深度学习的身份证号码识别（OCR，Opencv，Pytorch）相关推荐

最新文章

热门文章