OCR身份证信息批量识别导出excel

主要功能

实现对身份证照片识别，获取证件上的信息，并实现批量式获取，导出excel 文档。

前期准备

程序需要在阿里云上接口，需要获得appcode才能执行

path = StringVar()
appcode = '' #可在阿里云购买 https://market.aliyun.com/products/57124001/cmapi010401.html#sku=yuncode440100000
url = 'http://dm-51.data.aliyun.com/rest/160601/ocr/ocr_idcard.json'

运行所需要的库

import glob
import base64
import json
import base64
import threading
import time
from tkinter import *
from tkinter import filedialog
import tkinter
from tkinter.filedialog import askdirectory
from tkinter.messagebox import showinfo
import pandas as pd
import requests
from tkinter import ttk
import json
import base64

运行效果

完整代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import glob
import base64
import json
import base64
import threading
import time
from tkinter import *
from tkinter import filedialog
import tkinter
from tkinter.filedialog import askdirectory
from tkinter.messagebox import showinfo
import pandas as pd
import requests
from tkinter import ttk
import json
import base64
ENCODING = 'utf-8'#选择文件夹返回文件夹的路径
def selectPath():init()path_ = askdirectory() #使用askdirectory()方法返回文件夹的路径if path_ == "":path.set(path_)path.get() #当打开文件路径选择框后点击"取消" 输入框会清空路径，所以使用get()方法再获取一次路径showinfo('提示', '未选择文件夹')else:path_ = path_.replace("/", "\\")  # 实际在代码中执行的路径为“\“ 所以替换一下path.set(path_)showinfo('提示', '已选择'+str(path.get())+'文件夹！') print("路径："+str(glob.glob(path.get()+"\\*")))#识别按钮实现线程同时开始
def shibie():if path.get() != '':thread_it(create) thread_it(tijiao) else:showinfo('提示', '请选择文件夹！') #调用阿里云接口
def tijiao():init()global statzzxstatzzx=0id =0  for file_abs in glob.glob(path.get()+"\\*"):file_ab = file_abs.replace("\\", "/")#如果没有configure字段，configure设为None#configure = Noneimg_base64data = get_img_base64(file_ab)try:         name, sex, nationality, birth, address, num = predict(url, appcode, img_base64data, configure)   except TypeError:print("图片错误")continueif flag == 0: id=id+1img_file.append({'姓名': name, '性别': sex, '民族': nationality, '出生': birth, '住址': address, '身份证号码': num,"图片路径":file_abs})img_file1.append([id ,name, sex, nationality,  birth,  address,num,file_abs])            else:print('识别错误')continue    statzzx=1    insert()#将函数打包进线程
def thread_it(func):'''将函数打包进线程'''# 创建t = threading.Thread(target=func) # 守护 !!!t.setDaemon(True) # 启动t.start()
#将数据导出表格
def writeExcel():if len(img_file):# 存在值即为真pf = pd.DataFrame(img_file)order = ['姓名', '性别', '民族', '出生', '住址', '身份证号码','图片路径']pf = pf[order]file_path = filedialog.asksaveasfilename(defaultextension='.py',filetypes = [("Excel files",".xlsx")])print("文件保存路径："+str(file_path))print("sadfsafasfafasf"+str(img_file[0]))print("sadfsafasfafasf"+str(img_file[0]['姓名']))pf.to_excel(file_path, encoding='utf-8', index=False, sheet_name="身份证信息")print("导出Excel成功!") showinfo('成功', '导出Excel成功!')  else:print("请选择文件夹！") showinfo('提示', '请先批量识别！')
#文件识别进度框
def create():    top = Toplevel()top.title('文件识别中...')pb = ttk.Progressbar(top, length=280, mode="determinate", orient=HORIZONTAL)#indeterminate determinatew = 300h = 70x1 = int((screenwidth - w) / 2)y1 = int((screenheight - h) / 2)top.geometry('{}x{}+{}+{}'.format(w, h, x1, y1))pb.pack(padx=10, pady=20)pb["maximum"] = 100pb["value"] = 0print("\n"*2)print("执行开始".center(scale+28,'-'))start = time.perf_counter()for i in range(scale+1):time.sleep(0.03)if(statzzx != 1):pb["value"] = i      # 每次更新1root.update()            # 更新画面a = '*' * ib = '.' * (scale - i)c = (i/scale)*100t = time.perf_counter() - startprint("\r任务进度:{:>3.0f}% [{}->{}]消耗时间:{:.2f}s".format(c,a,b,t),end="")else:print("文件获取成功！")   showinfo('提示', '文件识别成功！')         breakprint("\n"+"执行结束".center(scale+28,'-'))top.destroy()#表格数据插入
def insert():# 插入数据for index, data in enumerate(img_file1):table.insert('', END, values=data)  # 添加数据到末尾
#表格数据删除
def delete():obj = table.get_children()  # 获取所有对象for o in obj:table.delete(o)  # 删除对象
#图片转码
def get_img_base64(img_file):with open(img_file, 'rb') as infile:s = infile.read()return base64.b64encode(s).decode(ENCODING)
#接口访问
def predict(url, appcode, img_base64, kv_configure):param = {}param['image'] = img_base64if kv_configure is not None:param['configure'] = json.dumps(kv_configure)body = json.dumps(param)data1 = bytes(body, "utf-8")headers = {'Authorization' : 'APPCODE %s' % appcode}response = requests.post(url = url, headers = headers, data = data1)if response:data = response.json()print(data)name = data['name']sex = data['sex']nationality = data['nationality']birth = data['birth']address = data['address']num = data['num']return (name, sex, nationality, birth, address, num)else:flag = 1return flagroot = Tk()
root.title("身份证信息批量获取")
path = StringVar()
appcode = '' #可在阿里云购买 https://market.aliyun.com/products/57124001/cmapi010401.html#sku=yuncode440100000
url = 'http://dm-51.data.aliyun.com/rest/160601/ocr/ocr_idcard.json'
configure = {'side':'face'}
flag = 0
scale=100
statzzx = 0
img_file= []
img_file1= []screenwidth = root.winfo_screenwidth()  # 屏幕宽度
screenheight = root.winfo_screenheight()  # 屏幕高度
width = 1000
height = 500
x = int((screenwidth - width) / 2)
y = int((screenheight - height) / 2)
root.geometry('{}x{}+{}+{}'.format(width, height, x, y))  # 大小以及位置
tabel_frame = tkinter.Frame(root)
xscroll = Scrollbar(tabel_frame, orient=HORIZONTAL)
yscroll = Scrollbar(tabel_frame, orient=VERTICAL)columns = ['id', '姓名', '性别', '民族', '出生', '住址', '身份证号码','图片路径']
table = ttk.Treeview(master=root,  # 父容器height=10,  # 表格显示的行数,height行columns=columns,  # 显示的列show='headings',  # 隐藏首列xscrollcommand=xscroll.set,  # x轴滚动条yscrollcommand=yscroll.set,  # y轴滚动条)#初始化控件
def init():root.grid_columnconfigure(1, minsize=200)  # Heretable.heading('id', text='序号', )  # 定义表头table.heading('姓名', text='姓名', )  # 定义表头table.heading('性别', text='性别', )  # 定义表头table.heading('民族', text='民族', )  # 定义表头table.heading('出生', text='出生', )  # 定义表头table.heading('住址', text='住址', )  # 定义表头table.heading('身份证号码', text='身份证号码', )  # 定义表头table.heading('图片路径', text='图片路径', )  # 定义表头table.column('id', width=10, minwidth=10, anchor=S, )  # 定义列table.column('姓名', width=30, minwidth=30, anchor=S, )  # 定义列table.column('性别', width=20, minwidth=10, anchor=S)  # 定义列table.column('民族', width=20, minwidth=10, anchor=S)  # 定义列table.column('出生', width=50, minwidth=50, anchor=S)  # 定义列table.column('住址', width=200, minwidth=100, anchor=S)  # 定义列table.column('身份证号码', width=150, minwidth=100, anchor=S)  # 定义列table.column('图片路径', width=150, minwidth=100, anchor=S)  # 定义列table.grid(row=3,columnspan = 4, padx = 18,ipadx = 165,ipady = 100,pady=10)delete()global img_fileimg_file=[]global img_file1img_file1=[]def demo():    Button(root, text="文件夹批量选择", command=lambda :thread_it(selectPath),width=15).grid(row=0, column=0,padx=18,pady=10,sticky = 'w')Entry(root, textvariable=path,state="readonly",width=83).grid(row=0, column=1,pady=10,sticky = 'w')Button(root, text="批量识别", command=shibie,width=13).grid(row=0, padx=3,column=2,sticky = 'w',pady=10)Button(root, text="导出表格", command=writeExcel,width=13).grid(row=0, padx=2,column=3,sticky = 'w',pady=10)    init()if __name__ == '__main__':demo()root.mainloop()

OCR身份证信息批量识别导出excel相关推荐

如何大批量扫描的发票进行ocr识别导出Excel表格和WPS表格
OCR技术:OCR(Optical Character Recognition,光学字符识别)是将数字图像中的文字识别成字符代码的技术,在发票识别中应用广泛.通过OCR技术,可以将图片发票上的信息识别 ...
阿里云OCR身份证信息识别
阿里云OCR身份证信息识别这里使用的是base64 public JSONObject getCard(MultipartFile file) {String host = "https: ...
批量导入/导出Excel文件(Python+Matlab)
简单整理了下批量导入/导出Excel的Python和Matlab代码. Matlab 连续数字型.xlsx文件(如file_1.xlsx, file_2.xlsx, file_3.xlsx) clc, ...
又一神器，批量证件识别系统OCR,身份证识别导出EXCEL表格，居住证识别导出EXCEL表格，后期还会有社保卡识别导出EXCE表格，营业执照识别导出EXCEL表格，刚芭蕾加油！！！
老规矩上图,操作界面很简单,先看一下截面图一秒钟识别一张,直接出来就是表格数据,前几天有人跟我说需要这样的系统可以批量导入身份证.社保卡.居住证后期还有工商营业执照批量导入进去然后识别成表格导出来 ...
发票信息批量提取到 excel 软件 4.0
个人微信 394467238 4.0 更新新增了自己的OCR引擎,使用了神经网络模型,但是很可能会导致导入模块失败,并且识别的结果还有很多小问题,所以还请大家使用后反馈问题新增了用发票号码.发票代 ...
发票信息批量提取到 excel 软件 3.1
3.1 更新基础图片识别完全放开了次数限制,可以完全免费使用了,并且修改了之前的一个会导致闪退的bug 新增了高级PDF批量识别,对于扫描版本的PDF文件可以支持了 3.0 更新最大的改动为使用次 ...
发票信息批量提取到 excel 软件 3.0
3.0 更新最大的改动为使用次数可以自动获取了,通过微信扫描小程序即可获得修改了高级图片识别的一些发票项目的bug 2.5更新: 改正了很多影响体验的bug....用起来应该会好多了 2.4更新: ...
发票信息批量提取到 excel 软件 5.1
5.1更新精简入口,合并了图片和PDF的识别入口修正了一些网络不好的情况下的bug 4.0 更新新增了自己的OCR引擎,使用了神经网络模型,但是很可能会导致导入模块失败,并且识别的结果还有很多小 ...
发票信息批量提取到 excel 软件 2.3
地址更新为: 链接:https://pan.baidu.com/s/1WQQ8kaDilaagjoK5IrYZzA 提取码:1111 日了狗了,电脑还是有木马,下载链接取消了,后续我换台电脑打包软件 ...
PDF信息批量提取至Excel
在学院做助管时,某老师布置了任务:将1000+个PDF文件的指定内容整理到一个Excel中.为了防止在逐个打开复制粘贴的过程中猝死,尝试编写代码简化此问题.实现的功能主要是将每个PDF的信息分别提取到 ...

OCR身份证信息批量识别导出excel

OCR身份证信息批量识别导出excel

主要功能

前期准备

运行效果

完整代码

OCR身份证信息批量识别导出excel相关推荐

最新文章

热门文章