selenium 批量下载qq邮箱附件

# encoding:utf-8
import os
import urllib
import _thread
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys#......................................................
# 自定义参数
#......................................................'''
先登陆邮箱，找到你想处理的文件夹，右键新窗口打开，在浏览器地址栏可以看到以下地址,或者在Network中抓包:
https://mail.qq.com/cgi-bin/frame_html?t=frame_html&sid={ A }&url=/cgi-bin/mail_list?folderid={ B }%26page={ C }
'''# 需要自定义的参数已在上方链接以 A B C 标记出，然后自己替换下面的参数
'''
A - token['sid']         这串密钥会定时更新，每次使用时需要重新填写。
B - token['folderid']    文件夹ID
C - token['page']        邮件列表页数(初始页为0)。如果要下载第2页的邮件，就改为1。
'''token={'sid':'f-dDI-WsroKD5XIs', 'folderid':130, 'page':0}
download_here='E:\\emal'#......................................................
# Debug 模式 （0: 关闭 | 1: 开启）
# 开启DEBUG模式后，只输出列表数据，不下载任何附件。
#......................................................DEBUG=0'''
......................................................
pageInfo['now']     目前在第几页，默认为0，即第1页。
pageInfo['max']     文件夹共有几页
pageInfo['step']    希望下载至多少页
'''
pageInfo={'now':0, 'max':0, 'step':0, 'isfilp':0, 'autofilp':True}'''
......................................................
title['index']    第index封邮件，默认为1，不可修改。
title['start']    从第start封邮件开始，默认为1
title['end']      到第end封邮件结束，默认为-1
title['step']     读取邮件次数超过step时结束，默认为-1
......................................................
'''title={'index':1, 'start':0, 'end':-1, 'step':-1}#......................................................
# 后续可完成的功能
#......................................................
# for name in title:
#   for key in ignore_keys:
#     if key in name:
#......................................................
# 关键词屏蔽（黑名单）
# ignore_keys 全局关键词，无论在哪里出现都跳过
# ignore_tile_keys 从邮件标题搜索
# ignore_user_keys 从发信人昵称搜索
# ignore_file_keys 从附件文件名搜索
#......................................................ignore_keys = []
ignore_tile_keys=[]
ignore_user_keys=[]
ignore_file_keys=[]# 用来计数测试用的，避免漏掉一些文件。
test={'fileindex': 0, 'filecount': 0, 'downloadtimes': 0}# readmail  邮件列表：包含邮件id(value)、时间戳(totime)、发件人邮箱(fa)、发件人昵称(fn)
# filemail  附件列表：包含邮箱、邮件主题、发件人昵称、附件名
# foolmail  没有附件：包含发件人昵称
readmail, filemail, foolmail = [], [], []# 邮箱地址
url_qqmail='https://mail.qq.com/'
url_folder=url_qqmail+'cgi-bin/mail_list?sid={}&folderid={}&page={}'.format(token['sid'],token['folderid'],token['page'])#......................................................
# 检查下载路径是否存在
#......................................................
if not os.path.exists(download_here):print("文件夹不存在。正在自动创建文件夹....")os.mkdir(download_here)#......................................................
# 配置Web Driver
#......................................................
options=webdriver.ChromeOptions()
prefs={"profile.managed_default_content_settings.images":2,"download.default_directory":download_here}
options.add_argument("user-data-dir=selenium")
options.add_experimental_option("prefs",prefs)
#options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument('--ignore-certificate-errors')
chrome=webdriver.Chrome(r"C:\Users\Administrator\chromedriver.exe", options=options)# 启动Web Driver
print(" ")
print(" Chrome启动")
chrome.get(url_qqmail)
#chrome.implicitly_wait(3)#执行邮箱登陆
chrome.switch_to_frame("login_frame")
chrome.find_element_by_id("uinArea").click()email = chrome.find_element_by_name("u")
email.clear()
email.send_keys("user_name")                #user_name：邮箱用户名password = chrome.find_element_by_name("p")
password.send_keys("password")              #password：邮箱密码chrome.find_element_by_id("login_button").click()
time.sleep(1)print("after login------------------------")#再次打印当前页面
print(chrome.title)#打印当前页面url
print(chrome.current_url)#......................................................
# 获取页面信息
#......................................................while pageInfo['autofilp']:chrome.get(url_folder)element = chrome.find_element_by_id("mainFrame")chrome.switch_to.frame(element)if pageInfo['isfilp']==0:pageInfo['now']=token['page']+1pageInfo['max']=eval(chrome.find_elements_by_class_name("right")[1].find_elements_by_tag_name('script')[0+pageInfo['isfilp']].get_attribute('innerHTML').strip('document.write(').strip(');'))print(" ---- 进入文件夹: ",chrome.find_element_by_xpath('//*[@id="qqmail_mailcontainer"]/div[1]').text.strip('管理"我的文件夹"').strip())print("当前是第{}/{}页".format(pageInfo['now'],pageInfo['max']))#翻页规则can_filp_1 = pageInfo['now'] < pageInfo['max']can_filp_2 = pageInfo['now'] < pageInfo['step']# 获取邮件列表elements=chrome.find_elements_by_css_selector('input[name="mailid"]')for e in elements[1:]:try:if title['index'] >= title['start']:# check = (false, true)[num == -1]check_step = (title['index']-title['start'] < title['step'], True)[title['step'] == -1]if check_step:sender={}sender.update({'timestamp': e.get_attribute('totime')})sender.update({'name': e.get_attribute('fn')})sender.update({'email': e.get_attribute('fa')})sender.update({'id': e.get_attribute('value')})sender.update({'index': title['index']})readmail.append(sender)print(' ├─{} {}'.format(title['index'],sender['name']))time.sleep(0.002)title['index']+=1except Exception as e:breakif can_filp_2:pageInfo['isfilp']=1pageInfo['now']+=1url_folder=url_qqmail+'cgi-bin/mail_list?sid={}&folderid={}&page={}'.format(token['sid'],token['folderid'],pageInfo['now']-1)else:pageInfo['autofilp']=False#......................................................
# 开始处理附件
#......................................................
time.sleep(1)
#os.system('cls')
print("\n")
print(" ---------------------------------------")
print(' 邮件主题({})'.format(title['index']-1))
print(" ---------------------------------------")
print(" ")
print(" 开始处理附件")# 获取每封邮件的附件列表
for key in readmail:time.sleep(2)url=url_qqmail+'cgi-bin/frame_html?sid={}&url=/cgi-bin/readmail?mailid={}'.format(token['sid'],key['id'])chrome.get(url)try:chrome.switch_to.default_content()chrome.find_element_by_id("mainFrame")except Exception as e:print("您请求的频率太快，请稍后再试")os.system('pause')chrome.get(url)chrome.switch_to.default_content()chrome.switch_to.frame(chrome.find_element_by_id("mainFrame"))elements=chrome.find_elements_by_class_name("name_big")isFool=len(elements)<=0  #没有附件print(' ├─{} {} {}'.format(key['index'],key['name'],(key['email'], '(没有附件)')[isFool]))if isFool:foolmail.append(key['name'])mark_star=chrome.find_element_by_id("img_star")if mark_star.get_attribute("class") == 'qm_ico_flagoff':mark_star.send_keys(Keys.SPACE)continuefor f in elements:attach={}attach.update({'title': chrome.find_element_by_id("subject").text})attach.update({'name': key['name']})attach.update({'email': key['email']})attach.update({'filename': f.find_element_by_css_selector('span:nth-child(1)').text})filemail.append(attach)test['filecount']+=1print(" │  ├─{}".format(attach['filename']))if DEBUG != 1:os.chdir(download_here)cmd=open("_ren.bat","a")cmd.seek(0)cmd.truncate()cmd.write("@echo off")cmd.write("\n")for key in filemail:cmd.write('ren "{}" "{}-{}"'.format(key['filename'], key['email'], key['filename']))cmd.write("\n")cmd.write("del _ren.bat")cmd.close()if DEBUG != 1:elements=chrome.find_elements_by_link_text('下载')for e in elements:e.click()test['downloadtimes']+=1time.sleep(0.18)time.sleep(0.75)print(' └─Emailecount:{}    Foolcount:{}    filecount:{}    downloadtimes:{}'.format(len(readmail), len(foolmail), test['filecount'],test['downloadtimes']))

精简版本：

# encoding:utf-8
import os
import urllib
import _thread
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import re
import datetimedownload_here='E:\\emal'
download_here_txt = 'E:\\emal\\txt'
DEBUG=0
pageInfo={'now':0, 'max':0, 'step':0, 'isfilp':0, 'autofilp':True}
title={'index':1, 'start':0, 'end':-1, 'step':-1}# 用来计数测试用的，避免漏掉一些文件。
test={'fileindex': 0, 'filecount': 0, 'downloadtimes': 0}# readmail  邮件列表：包含邮件id(value)、时间戳(totime)、发件人邮箱(fa)、发件人昵称(fn)
# filemail  附件列表：包含邮箱、邮件主题、发件人昵称、附件名
# foolmail  没有附件：包含发件人昵称
readmail, filemail, foolmail = [], [], []# 邮箱地址
url_qqmail='https://mail.qq.com/'
#......................................................
# 检查下载路径是否存在
#......................................................
if not os.path.exists(download_here):print("文件夹不存在。正在自动创建文件夹....")os.mkdir(download_here)
if not os.path.exists(download_here_txt):print("文件夹不存在。正在自动创建文件夹....")os.mkdir(download_here_txt)#......................................................
# 配置Web Driver
#......................................................
options=webdriver.ChromeOptions()
prefs={"profile.managed_default_content_settings.images":2,"download.default_directory":download_here}
options.add_argument("user-data-dir=selenium")
options.add_experimental_option("prefs",prefs)
options.add_argument("--window-max")
options.add_argument('--ignore-certificate-errors')
chrome=webdriver.Chrome(r"C:\Users\Administrator\chromedriver.exe", options=options)# 启动Web Driver
print(" ")
print(" Chrome启动")
chrome.get(url_qqmail)
#chrome.implicitly_wait(3)#执行邮箱登陆
chrome.switch_to.frame("login_frame")
chrome.find_element_by_class_name("switch").find_element_by_class_name("switch_btn").click()
chrome.find_element_by_id("uinArea").click()email = chrome.find_element_by_name("u")
email.clear()
email.send_keys("user_name")                #user_name：邮箱用户名password = chrome.find_element_by_name("p")
password.send_keys("password")              #password：邮箱密码
time.sleep(1)
chrome.find_element_by_id("login_button").click()
time.sleep(1)print("after login------------------------")#再次打印当前页面
print(chrome.title)#打印当前页面url
print(chrome.current_url)#获取sid
sid = "".join(re.findall(r'sid=(.*?)&',chrome.current_url))
print(sid)# 获取文件夹url
url_folder= "https://mail.qq.com/cgi-bin/mail_list?sid={}&folderid=130".format(sid)
#......................................................
# 获取页面信息
#......................................................while pageInfo['autofilp']:chrome.get(url_folder)time.sleep(0.5)element = chrome.find_element_by_id("mainFrame")chrome.switch_to.frame(element)# 获取邮件列表elements=chrome.find_elements_by_css_selector('input[name="mailid"]')for e in elements[1:]:try:sender={}sender.update({'timestamp': e.get_attribute('totime')})sender.update({'name': e.get_attribute('fn')})sender.update({'email': e.get_attribute('fa')})sender.update({'id': e.get_attribute('value')})sender.update({'index': title['index']})readmail.append(sender)print(' ├─{} {}'.format(title['index'],sender['name']))time.sleep(1)title['index']+=1except Exception as e:breakpageInfo['autofilp']=False#......................................................
# 开始处理附件
#......................................................
time.sleep(1)
#os.system('cls')
print("\n")
print(" ---------------------------------------")
print(' 邮件主题({})'.format(title['index']-1))
print(" ---------------------------------------")
print(" ")
print(" 开始处理附件")# 获取每封邮件的附件列表
for k, key in enumerate(readmail):time.sleep(1)url=url_qqmail+'cgi-bin/frame_html?sid={}&url=/cgi-bin/readmail?mailid={}'.format(sid, key['id'])chrome.get(url)try:chrome.switch_to.default_content()chrome.find_element_by_id("mainFrame")except Exception as e:print("您请求的频率太快，请稍后再试")os.system('pause')chrome.get(url)chrome.switch_to.default_content()chrome.switch_to.frame(chrome.find_element_by_id("mainFrame"))elements=chrome.find_elements_by_class_name("name_big")isFool=len(elements)<=0  #没有附件print(' ├─{} {} {}'.format(key['index'],key['name'],(key['email'], '(没有附件)')[isFool]))for f in elements:attach={}attach.update({'title': chrome.find_element_by_id("subject").text})attach.update({'name': key['name']})attach.update({'email': key['email']})attach.update({'filename': f.find_element_by_css_selector('span:nth-child(1)').text})attach.update({'date': "".join(re.findall(r'(\d{4}年\d{2}月\d{2}日)', chrome.find_element_by_class_name("readmailinfo").text))})attach.update({'time': "".join(re.findall(r'(\d*:\d*)', chrome.find_element_by_class_name("readmailinfo").text))})attach.update({'text': chrome.find_element_by_xpath("//*[@id='contentDiv']").text})filemail.append(attach)test['filecount']+=1print(" │  ├─{}".format(attach['filename']))if DEBUG != 1:elements=chrome.find_elements_by_link_text('下载')for e in elements:e.click()test['downloadtimes']+=1time.sleep(1)# if not isFool:#     with open(download_here + "\\" + filemail[k]["filename"] + ".txt", 'w', encoding="utf-8") as f:#         f.write(filemail[k]["text"])# time.sleep(0.75)## for file in os.listdir(download_here_txt):#     os.rename(os.path.join(download_here_txt, file), os.path.join(path, filemail[k]["date"] + " +" + filemail[k]["time"] + " +"+ file))
print(' └─Emailecount:{}    Foolcount:{}    filecount:{}    downloadtimes:{}'.format(len(readmail), len(foolmail), test['filecount'],test['downloadtimes']))

selenium 批量下载qq邮箱附件相关推荐

python批量下载qq邮箱文件
首先邮箱的配置P0P3 配置成功后可以直接在安装有python的win或者linux跑代码,注意修改文件路径和邮箱名以及访问码!!! 代码如下: https://github.com/progragu ...
如何使用python批量下载-使用 Python + Selenium 批量下载素材
原文首发于CSDN,略有增删使用 Python + Selenium 批量下载素材本文简单介绍使用 Python + Selenium 从ManyPixels线上图库批量下载素材的方法.截止到现在 ...
python自动下载qq文件夹_GitHub - 1061700625/QQZone_AutoDownload_Album: Python+selenium 自动下载QQ空间相册...
QQZone_AutoDownload_Album Python+selenium 自动下载QQ空间相册 . selenium_firefox.zip 需要解压后放在同路径下 . 貌似腾讯的登陆加密做 ...
[实战篇]关于QQ邮箱附件上传功能之测试用例
今天,我们来分析关于QQ邮箱的最后一个功能模块:附件上传的测试用例. 上传文件的格式需求里的图片文件支持 jpg.gif.png.bmp 这四种文件格式,jpg就是平常的那种彩色图片,gif是动画的文 ...
selenium自动登录QQ邮箱(附带滑动解锁)
问题分析:登录+滑动解锁其实登录账号的部分本来很简单,用selenium打开QQ邮箱官网:https://mail.qq.com 然后切换frame输入帐号和密码点击登录即可,但是部分账号,或者可 ...
python中使用selenium模块登录QQ邮箱
直接上代码,需要安装selenium模块,各个函数作用可以百度. 注意:下面程序中需要替换你自己的账户和密码. #-*-coding:utf-8-*-from selenium import webd ...
Java实现发送QQ邮箱+附件
首先要先在QQ邮箱里开启服务超详细博客 ←点击这里 QQ邮箱–>邮箱设置–>账户>POP3/IMAP/SMTP/Exchange/CardDAV/CalDAV服务开启POP3/S ...
python发邮件代码要下载qq邮箱吗_python登录QQ邮箱发送QQ邮件代码
#encoding=utf-8 __author__ = 'ds' #文件名称冲突 from email.mime.text import MIMEText import ...
qq邮箱附件文件名乱码处理方法
IE的兼容性视图设置 ,把邮箱所在网址输入(QQ邮箱就输入 qq.com).一切OK了!

selenium 批量下载qq邮箱附件

selenium 批量下载qq邮箱附件相关推荐

最新文章

热门文章