爬虫实战：12306登录

爬虫实战：破解点触验证码，实现12306登录

1.目标

实现12306登录，获取登录cookies

2.技术点

1.借用第三方打码平台，进行图片验证码识别
2.破解selenium webdriver反爬

3.思路

1.输入账号密码
2.获取验证图片
3.识别图片，获取坐标
4.图片验证
5.登录
6.滑动滑块

4.环境

python + selenium + 超级鹰

5.代码

1.12306登录.py

# @author: zly
# @function: Touch verification code
# @time: 2020-09-15
# @copyright: All Rights Reversedimport time
import randomfrom selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChainsfrom chaojiying import Chaojiying_Client
from constants import *class MakeTrack:"""Track generator, need to pass a distance parameter"""def __init__(self, distance=DISTANCE):self.distance = distancedef segmentate(self, s):"""Track splitter, the size of each piece of track is not dividedReturns a list object of a track block:params --> Tracks to be segmented, int"""if SEGMENTNUM1 <= abs(s) < SEGMENTNUM2:s = [round(s / 3) - 3, round(s / 3) + 3]elif abs(s) >= SEGMENTNUM2:s = [round(s / 5) - 5, round(s / 5) - 3,round(s / 5),round(s / 5) + 3, round(s / 5) + 5]else:s = [round(s)]return sdef make_track(self):"""Make sliding track to simulate human normal movementReturn a list object of sliding track"""track = []current = v0 = 0while self.distance > current:# 随机事件，随机加速度，生成随机位移t = random.randint(1, 4) / 2a = random.randint(1, 3)# 速度、位移v0 += a * ts = v0 * t + 0.5 * a * t ** 2# 将不和规则的较大的位移进行分割seg = self.segmentate(round(s))track.extend(seg)current += s# 对不超过目标位移或者不足位移做补偿while True:if sum(track) == self.distance:breakelif sum(track) > self.distance:track.pop()else:track.append(self.distance - sum(track))if len(track) > TRACKMAXLENGTH:self.make_track()return trackclass Login12306(Chaojiying_Client):""":paramusername   12306账号    --> strpassword   12306密码    --> strcusername  超级鹰账号    --> strcpassword  超级鹰密码    --> strsoft_id    软件ID       --> strcodetype   验证类型      --> intpath       验证码图片路径 --> strThere are three to config your init configration1. by set constant2. by set config dict3. Direct set init configration"""def __init__(self, username=None, password=None,cusername=None, cpassword=None, soft_id=None,codetype=None, path=None,*args, **kwargs):# 配置优化，可以字典的形式传递参数if kwargs.get('configs', 'None'):# 连接超级鹰，初始化super().__init__(username=kwargs['configs'].get('cusername', ''),password=kwargs['configs'].get('cpassword', ''),soft_id=kwargs['configs'].get('soft_id', ''))self.username = kwargs['configs'].get('username', '')self.password = kwargs['configs'].get('password', '')self.cusername = kwargs['configs'].get('cusername', '')self.cpassword = kwargs['configs'].get('cpassword', '')self.soft_id = kwargs['configs'].get('soft_id', '')self.codetype = kwargs['configs'].get('codetype', '')self.path = kwargs['configs'].get('path', '')elif USERNAME:self.username = USERNAMEself.password = PASSWORDself.cusername = CUSERNAMEself.cpassword = CPASSWORDself.soft_id = SOFTIDself.codetype = CODETIPEself.path = PATHelse:# 连接超级鹰，初始化super().__init__(username=cusername,password=cpassword,soft_id=soft_id)self.username = usernameself.password = passwordself.cusername = cusernameself.cpassword = cpasswordself.soft_id = soft_idself.codetype = codetypeself.path = pathself.run@propertydef run(self):"""You can call the run method directly for login verification,or you can also call other methods to achieve this function:returnReturn false means login verification failedReturn true means login verification success"""self.driver = self.prepares()self.driver.get('https://kyfw.12306.cn/otn/resources/login.html')self.driver.implicitly_wait(IMPLICITLYWAIT)self.driver.maximize_window()time.sleep(1)# 1.输入账号密码self.input_user_pwd(username=self.username, password=self.password)# 2.获取验证图片self.get_pic()while True:# 3.识别图片，获取坐标position, pic_id = self.get_position(codetype=self.codetype)if not position:position, pic_id = self.get_position(codetype=self.codetype)# 4.图片验证self.img_click(position)# 5.登录login = self.login(pic_id)if not login:self.driver.refresh()self.input_user_pwd(username=self.username, password=self.password)self.get_pic()continue# 6.滑动滑块return True if self.slide() else Falsedef prepares(self):"""Break through 12306 webriverReturns a webdrive after anti pickling"""# 12306通过图片验证之后依然登陆不上，其中的原因是有webdriver反扒# 要想突破反扒，就必须修改带有webdrive的标志，我们用selenium打开的浏览器# 上面往往都会显示 Chrome正受到自动测试软件的控制# 因此我们需要修改Options和selenium浏览器的js标志navigator# selenium控制的浏览器默认是true/false，正常的是undefinedoptions = webdriver.ChromeOptions()options.add_experimental_option("excludeSwitches", ["enable-automation"])options.add_experimental_option('useAutomationExtension', False)driver = webdriver.Chrome(options=options)driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",{"source": "Object.defineProperty(""navigator, 'webdriver', ""{get: () => undefined})"})return driverdef input_user_pwd(self, username=None, password=None):"""Enter 12306 account and password@username: 12306账号 --> str, defalut is None@password: 12306密码 --> str, defalut is NoneThe return 0 here has no effect, it just means the end of the function"""# 切换至账号密码登录self.driver.find_element_by_xpath('//li[@class="login-hd-account"]/a').click()# 这里需要睡1-2秒，否则会报错，加载js，浏览器js没有代码快time.sleep(2)# 输入账号密码self.driver.find_element_by_id('J-userName').send_keys(username)self.driver.find_element_by_id('J-password').send_keys(password)return 0def get_pic(self):"""Get touch captcha imageThe return 0 here has no effect, it just means the end of the function"""# 截图self.driver.find_element_by_id('J-loginImg').screenshot(self.path)return 0def get_position(self, codetype=None):"""Get the touch coordinates of super Eagle verification@soft_id: 软件ID      --> str, defalut is None@codetype: 验证类型    --> int, defalut is None:returna list object [position, pic_id]"""# 发送图片，获取坐标是verify_data = self.PostPic(self.path, codetype)print(verify_data)# 如果成功获取坐标则格式化，否则return Noneif verify_data['err_no'] == 0:temp = verify_data['pic_str'].split('|')position = [i.split(',') for i in temp]return [position, verify_data['pic_id']]else:self.ReportError(verify_data['pic_id'])return [None, verify_data['pic_id']]def img_click(self, position):"""Get the touch coordinates of super Eagle verification@position: 点触坐标 --> Nested list, [['55', '55'], ['88', '88']...]The return 0 here has no effect, it just means the end of the function"""# 要点触的图片element = self.driver.find_element_by_id('J-loginImg')# 按照坐标值点击for k in position:# x、y需要int的原因：move_to_element_with_offset中x、y只能是int型x = int(k[0])y = int(k[1])ActionChains(self.driver).move_to_element_with_offset(element, x, y).click().perform()return 0def login(self, pic_id=None):"""Its role is to log in and get cookiesReturn true means the verification is successful, otherwise it fails"""# 登录，获取cookiesself.driver.find_element_by_id('J-login').click()# 判断图片验证是否验证成功verify_tag = self.driver.find_element_by_xpath('//*[@class="lgcode-error"]')# 看verify_tag的display属性是否可见，可见则表示验证失败if verify_tag.is_displayed():# 别浪费钱，向超级鹰报个错self.ReportError(pic_id)print("图片验证失败，报错成功")return Falseprint("图片验证成功")time.sleep(3)return Truedef slide(self):"""Sliding verification,if it's successful return cookies, or return False"""try:# 定位滑块element = self.driver.find_element_by_id('nc_1_n1z')# 生成轨迹track = MakeTrack().make_track()# 滑动ActionChains(self.driver).click_and_hold(element).perform()[ActionChains(self.driver).move_by_offset(i, 0).perform() for i in track]ActionChains(self.driver).release(element).perform()# 时间取决于网速time.sleep(5)except Exception as e:# stale element reference: element is not attached to the page document# 页面刷新导致获取不到元素，若能够滑动通过此错误无需再管，不是每次都会发生print(str(e))time.sleep(10)self.driver.quit()return False# 判断是否登陆成功try:self.driver.find_element_by_xpath('//*[@class="btn btn-primary ok"]').click()cookies = self.driver.get_cookies()print("恭喜您登陆成功")print(cookies)time.sleep(10)self.driver.quit()return Trueexcept Exception as e:print(str(e))print("恭喜您登陆失败，再来一次吧")time.sleep(10)self.driver.quit()return Falseconfigs = {'username': '',      # 12306账号'password': '',      # 12306密码'cusername': '',     # 超级鹰账号'cpassword': '',     # 超级鹰密码'soft_id': '',       # 软件ID'codetype': 9004,    # 验证类型'path': ''           # 验证码图片路径
}Login12306(configs=configs)

2、chaojiying.py

import requests
from hashlib import md5class Chaojiying_Client(object):def __init__(self, username, password, soft_id):self.username = usernamepassword = password.encode('utf8')self.password = md5(password).hexdigest()self.soft_id = soft_idself.base_params = {'user': self.username,'pass2': self.password,'softid': self.soft_id,}self.headers = {'Connection': 'Keep-Alive','User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',}def PostPic(self, path, codetype):"""path: 图片路径codetype: 题目类型 参考 http://www.chaojiying.com/price.html"""with open(path, 'rb') as f:imagecontent = f.read()params = {'codetype': codetype,}params.update(self.base_params)files = {'userfile': ('ccc.jpg', imagecontent)}r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)return r.json()def ReportError(self, im_id):"""im_id:报错题目的图片ID"""params = {'id': im_id,}params.update(self.base_params)r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)return r.json()

3、contants.py

# 12306账号
USERNAME = ''# 12306密码
PASSWORD = ''# 超级鹰账号
CUSERNAME = ''# 超级鹰密码
CPASSWORD = ''# 软件ID
SOFTID = ''# 验证类型
CODETIPE = ''# 验证码图片路径
PATH = ''# 滑块滑动的距离，单位：px
DISTANCE = 425# 轨迹分割规定大小
SEGMENTNUM1 = 30
SEGMENTNUM2 = 50# 轨迹最大段数
TRACKMAXLENGTH = 30# # 显性等待时间，单位：s
IMPLICITLYWAIT = 10

温馨提示：千万不要干坏事喲~~，否则抓进局里后果自负…

爬虫实战：12306登录相关推荐

python 12306登录_python爬虫--模拟12306登录
模拟12306登录超级鹰: #!/usr/bin/env python # coding:utf-8 import requests from hashlib import md5 class Ch ...
python 12306登录 2019_python爬虫--模拟12306登录
模拟12306登录超级鹰: #!/usr/bin/env python # coding:utf-8 import requests from hashlib import md5 class Ch ...
python成绩查询系统_Python爬虫实战：登录教务系统查成绩
本文记录我用Python登录教务系统查询成绩的过程.手动输入验证码,简单获取成绩页面.后续将可能更新自动识别验证码登录查询前期准备本爬虫用到了Python的Requests库和BeautifulS ...
python爬虫登录12306失败_使用python爬虫模拟12306登录方法
试了好久登录的时候总是显示:系统忙,请刷新,,,太折磨人了,搞了半天才想到是请求头部的问题..... 验证码还是要人工识图..#!/bin/env python # -*- coding=utf-8 ...
Python3 爬虫实战 — 模拟登陆12306【点触验证码对抗】
登陆时间:2019-10-21 实现难度:★★★☆☆☆ 请求链接:https://kyfw.12306.cn/otn/resources/login.html 实现目标:模拟登陆中国铁路12306,攻 ...
Python爬虫实战（5）：模拟登录淘宝并获取所有订单
Python爬虫入门(1):综述 Python爬虫入门(2):爬虫基础了解 Python爬虫入门(3):Urllib库的基本使用 Python爬虫入门(4):Urllib库的高级用法 Python爬虫 ...
Python爬虫实战之（五）| 模拟登录wechat
作者:xiaoyu 微信公众号:Python数据科学知乎:Python数据分析师不知何时,微信已经成为我们不可缺少的一部分了,我们的社交圈.关注的新闻或是公众号.还有个人信息或是隐私都被绑定在了一 ...
Python爬虫实战之（五）| 模拟登录wechat 1
作者:xiaoyu 微信公众号:Python数据科学知乎:Python数据分析师不知何时,微信已经成为我们不可缺少的一部分了,我们的社交圈.关注的新闻或是公众号.还有个人信息或是隐私都被绑定在了一 ...
爬虫三（Bs4搜索、Selenium基本使用、无界面浏览器、Selenium自动登录百度案例、自动获取12306登录验证码案例、切换选项卡、浏览器前进后退、登录Cnblogs获取Cookie自动点赞）
文章标题一.Bs4搜索文档树二.CSS选择器三.selenium基本使用四.无界面浏览器五.selenium其他使用 1)自动登录百度案例 2)获取位置属性大小.文本 3)自动获取12306 ...

爬虫实战：12306登录

爬虫实战：破解点触验证码，实现12306登录

1.目标

2.技术点

3.思路

4.环境

5.代码

温馨提示：千万不要干坏事喲~~，否则抓进局里后果自负…

爬虫实战：12306登录相关推荐

最新文章

热门文章