原由

最近处理一批人脸识别数据，但是数据量不够需要网上下载，但是网上关键词爬取的话什么乱七八糟的图片都有，后期又还要重新筛选，所以这里弄了个自动识别自动下载的脚本，添加关键字运行之后就可以让他慢慢跑了。

安装insightface

安装教程：insightface

演示

结果

大概流程

下载安装好insightface，完整文件链接，根据readme文件修改源码不用去下载他的模型文件。
编辑关键词.txt，一个人物名一行，添加人物图片到face_db文件夹，图片文件名对应需要下载的人物名。（如果没有face_db文件夹就自己建一个。）
点击运行就ok啦，想下几张图就几张图。

下载图片源码

# -*- coding:utf-8 -*-
#By：Cxk
import requests,os #首先导入库
import  re,time
from insightface_test import *MaxSearchPage = 20 # 默认收索页数
CurrentPage = 0 # 当前正在搜索的页数
count_img=0face_recognitio = FaceRecognition("config.yml")
#图片链接正则和下一页的链接正则
def imageFiler(content): # 通过正则获取当前页面的图片地址数组return re.findall('"objURL":"(.*?)"',content,re.S)
def nextSource(content): # 通过正则获取下一页的网址nexts = re.findall('<div id="page">.*<a href="(.*?)" class="n">',content,re.S)[0] return nexts
#爬虫主体
def spidler(source,all_img):global folder_path,count_img,MaxSearchPage,CurrentPages=requests.session()s.headers['User-Agent']='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'content=s.get(source).content.decode('utf-8')imageArr = imageFiler(content) # 获取图片数组for imageUrl in imageArr:try:picture = requests.get(imageUrl,timeout=5) # 创建图片保存的路径pictureSavePath = folder_path+str(count_img)+'.jpg'with open(pictureSavePath, 'wb') as f:f.write(picture.content)# print(i)img = cv2.imdecode(np.fromfile(pictureSavePath, dtype=np.uint8), -1)# 识别人脸flag=face_recognitio.recognition(img,pictureSavePath,folder_path)if flag:count_img+=1else:os.remove(pictureSavePath)time.sleep(0.3)print('**********************%s.jpg完成****************'%count_img)if count_img>=all_img:breakexcept:continueif CurrentPage <= MaxSearchPage and count_img<all_img:    #继续下一页爬取if nextSource(content):CurrentPage += 1 # 爬取完毕后通过下一页地址继续爬取spidler("http://image.baidu.com" + nextSource(content),all_img)#爬虫的开启方法
def  beginSearch(key,all_img):#page搜索页面，数字上加两页就是实际搜索页面，列如page=-1，实际搜索页面为一页global folder_path#图片保存路径folder_path = './image/'if os.path.exists(folder_path) == False:os.makedirs(folder_path)StartSource = "http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=" + str(key) + "&ct=201326592&v=flip" # 分析链接可以得到,替换其`word`值后面的数据来搜索关键词spidler(StartSource,all_img)print('**********************全部完成****************')def signin():a, b, c = GetImgNameByEveryDir('face_db', '.jpg/.png/.jpeg')for i in b:try:img = cv2.imdecode(np.fromfile(i, dtype=np.uint8), -1)# 注册人脸face_recognitio.register(img,i)except:continueif __name__ == "__main__":#先进行人脸注册signin()f=open("关键词.txt","r",encoding='utf-8')all_img=5#每个关键字想要爬取的图片数for line in f:count_img=0beginSearch(line.strip('\n'),all_img)

人脸识别源码

import os
import yaml
import numpy as np
import insightface
import cv2,shutil
# from utils import log_util
# from utils.utils import get_user_id
from sklearn import preprocessing
# https://blog.doiduoyi.com/articles/1598762632278.html#%E4%BA%BA%E8%84%B8%E8%AF%86%E5%88%AB%E5%92%8C%E4%BA%BA%E8%84%B8%E6%B3%A8%E5%86%8C# Deploy Configuration File Parserclass DeployConfig:def __init__(self, conf_file):if not os.path.exists(conf_file):raise Exception('Config file path [%s] invalid!' % conf_file)with open(conf_file) as fp:configs = yaml.load(fp, Loader=yaml.FullLoader)deploy_conf = configs["FACE"]# 正数为GPU的ID，负数为使用CPUself.gpu_id = deploy_conf["GPU_ID"]self.face_db = deploy_conf["FACE_DB"]self.threshold = deploy_conf["THRESHOLD"]self.nms = deploy_conf["NMS"]class FaceRecognition:def __init__(self, conf_file):self.config = DeployConfig(conf_file)# 加载人脸识别模型self.model = insightface.app.FaceAnalysis()self.model.prepare(ctx_id=self.config.gpu_id, nms=self.config.nms)# 人脸库的人脸特征self.faces_embedding = list()# 加载人脸库中的人脸self.load_faces(self.config.face_db)# 加载人脸库中的人脸def load_faces(self, face_db_path):if not os.path.exists(face_db_path):os.makedirs(face_db_path)if os.path.exists('a.npy'):self.faces_embedding = np.load('a.npy', allow_pickle=True)self.faces_embedding = self.faces_embedding.tolist()def recognition(self, image,img_path,dir_path):try:faces = self.model.get(image)if faces:for face in faces:# 开始人脸识别embedding = np.array(face.embedding).reshape((1, -1))embedding = preprocessing.normalize(embedding)for com_face in self.faces_embedding:r = self.feature_compare(embedding, com_face["feature"], self.config.threshold)if r:save_path=dir_path+os.sep+com_face["user_id"]if not os.path.exists(save_path):os.makedirs(save_path)shutil.move(img_path,save_path)print("%s---分类完成！"%img_path)return Trueelse:return Falseexcept:return False@staticmethod# 静态方法 feature_compare，从而可以实现实例化使用,当然也可以不实例化调用该方法。def feature_compare(feature1, feature2, threshold):diff = np.subtract(feature1, feature2)dist = np.sum(np.square(diff), 1)# print("人脸欧氏距离：%f" % dist)if dist < threshold:return Trueelse:return Falsedef register(self, image, path):faces = self.model.get(image)if len(faces) != 1:print("没有检测到人脸，无法注册")return None# 判断人脸是否存在embedding = np.array(faces[0].embedding).reshape((1, -1))embedding = preprocessing.normalize(embedding)is_exits = Falsefor com_face in self.faces_embedding:r = self.feature_compare(embedding, com_face["feature"], self.config.threshold)if r:is_exits = Trueif is_exits:print("人脸已存在，无法注册")return None# 符合注册条件保存图片，同时把特征添加到人脸特征库中user_id = path.split(".")[0].split('\\')[1]# cv2.imencode('.jpg', image)[1].tofile(os.path.join(self.config.face_db, '%s.jpg' % user_id))self.faces_embedding.append({"user_id": user_id,"feature": embedding})# 保存a=np.array(self.faces_embedding)np.save('a.npy',a)   # 保存为.npy格式return Truedef GetImgNameByEveryDir(file_dir, videoProperty):# Input   Root Dir and get all img in per Dir.# Out     Every img with its filename and its dir and its pathFileNameWithPath = []FileName = []FileDir = []for root, dirs, files in os.walk(file_dir):for file in files:if os.path.splitext(file)[1] in videoProperty:FileNameWithPath.append(os.path.join(root, file))  # 保存图片路径FileName.append(file)                              # 保存图片名称FileDir.append(root[len(file_dir):])               # 保存图片所在文件夹return FileName, FileNameWithPath, FileDir

使用insightface进行人脸识别批量下载图片相关推荐

人脸识别2：InsightFace实现人脸识别Face Recognition(含源码下载)
人脸识别2:InsightFace实现人脸识别Face Recognition(含源码下载) 目录人脸识别2:InsightFace实现人脸识别Face Recognition(含源码下载) 1. ...
人脸识别4：Android InsightFace实现人脸识别Face Recognition(含源码)
人脸识别4:Android InsightFace实现人脸识别Face Recognition(含源码) 目录人脸识别4:Android InsightFace实现人脸识别Face Recognit ...
人脸识别3：C/C++ InsightFace实现人脸识别Face Recognition(含源码)
人脸识别3:C/C++ InsightFace实现人脸识别Face Recognition(含源码) 目录 1. 前言 2. 项目安装 (1)项目结构 (2)配置开发环境(OpenCV+OpenCL+ ...
批量下载图片的插件-免费自动下载图片的插件
为什么需要批量下载图片的插件?现实生活中基本上每个人都会和图片照片打交道,当看到自己喜欢的图片都喜欢保存下来.而大多用户只知道通过鼠标右键的 "图片另存为" 将图片下载到本地.图片 ...
2021-03-10 Python多线程爬虫快速批量下载图片
Python多线程爬虫快速批量下载图片 1.完成这个需要导入的模块 urllib,random,queue(队列),threading,time,os,json 第三方模块的安装键盘win+R,输入 ...
python 批量下载网页图片_Python实现多线程批量下载图片
<派森>(Python)3.13 win32 英文安装版类型:编程工具大小:21M语言:英文评分:8.7 标签: 立即下载爬取图片可真的是一个可遇不可求的机会. 有需求就会动力. 目 ...
爬虫小案例：基于Bing关键词批量下载图片（第二版）
一.需求: 基于Bing网站,输入关键词,批量下载图片保存到本地二.代码展示: import requests from lxml import etree import os from multi ...
爬虫小案例：基于Bing关键词批量下载图片
一.需求: 基于Bing网站,输入关键词,批量下载图片保存到本地二.演示: 三.直接上代码 import os import urllib.request import urllib.parse f ...
img绝对路径图片显示_使用python爬虫去风景图片网站批量下载图片
使用python爬虫(requests,BeautifulSoup)去风景图片网站批量下载图片 1.写代码背景: 今天闲来无事,想弄点图片放到电脑,方便以后使用,故去百度查找一些风景图片网站,发现图片 ...
python multiprocessing 批量下载图片+tqdm
紧接着我的上一篇博客:用tqdm可视化loop过程,我将继续探索multiprocessing 批量下载图片+tqdm 首先,是安装multiprocessing模块了,注意在python3下pip ...

使用insightface进行人脸识别批量下载图片

原由

安装insightface

演示

结果

大概流程

下载图片源码

人脸识别源码

使用insightface进行人脸识别批量下载图片相关推荐

最新文章

热门文章