python爬虫爬取携程国际机票航班信息,返回json串
# -*- coding: utf-8 -*-
import requests, json
import hashlib
import re# 此处的参数是json 出发三字码,达到三字码,出发时间,成人数,儿童数,婴儿数,三字码请传小写的
def get_index(paramter):# 如果只执行这个脚本,需要将下边这行注释掉,如果放到服务,就打开# paramter = json.loads(paramter)dep_code = paramter["dep_code"]arr_code = paramter["arr_code"]date = paramter["date"]adult = paramter["adult"]child = paramter["child"]infant = paramter["infant"]urls = 'https://flights.ctrip.com/international/search/oneway-{}-{}?depdate={}&cabin=y_s&adult={}&child={}&infant={}'.format(dep_code, arr_code, date, adult, child, infant)# 这个url返回所需要的参数# urls = 'https://flights.ctrip.com/international/search/oneway-bjs-sel?depdate=2019-05-21&cabin=y_s&adult=1&child=0&infant=0'headers = {'headers': 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}response = requests.get(urls, headers=headers)data = re.findall(r'GlobalSearchCriteria =(.+);', response.text)[0]post_dict = json.loads(data)return post_dict, datedef get_data(post_dicts):post_dict = post_dicts[0]date = post_dicts[1]# ABTString = re.findall('id="ab_testing_tracker" value=(.+)/>', response.text)[0]trans_id = post_dict['transactionID']url = "https://flights.ctrip.com/international/search/api/search/batchSearch"# 拼接加密参数 trans_id +出发地到达地+时间sign_value = trans_id + post_dict['flightSegments'][0]['departureCityCode'] + post_dict['flightSegments'][0]['arrivalCityCode'] + date# 进行md5加密md5 = hashlib.md5()md5.update(sign_value.encode('utf-8'))sign = md5.hexdigest()flightWayEnum = "OW"arrivalProvinceId = post_dict["flightSegments"][0]["arrivalProvinceId"]arrivalCountryName = post_dict["flightSegments"][0]["arrivalCountryName"]cabinEnum = post_dict["cabin"]departCountryName = post_dict["flightSegments"][0]["departureCountryName"]segmentNo = len(post_dict["flightSegments"])departureCityId = post_dict["flightSegments"][0]["departureCityId"]isMultiplePassengerType = 0post_dict["flightWayEnum"] = flightWayEnumpost_dict["arrivalProvinceId"] = arrivalProvinceIdpost_dict["arrivalCountryName"] = arrivalCountryNamepost_dict["cabinEnum"] = cabinEnumpost_dict["departCountryName"] = departCountryNamepost_dict["segmentNo"] = segmentNopost_dict["departureCityId"] = departureCityIdpost_dict["isMultiplePassengerType"] = isMultiplePassengerType# payload = '{"flightWayEnum":"OW","arrivalProvinceId":0,"arrivalCountryName":"韩国","infantCount":0,"cabin":"Y_S","cabinEnum":"Y_S","departCountryName":"中国","flightSegments":[{"departureDate":"2019-05-21","arrivalProvinceId":0,"arrivalCountryName":"韩国","departureCityName":"北京","departureCityCode":"BJS","departureCountryName":"中国","arrivalCityName":"首尔","arrivalCityCode":"SEL","departureCityTimeZone":480,"arrivalCountryId":42,"timeZone":480,"departureCityId":1,"departureCountryId":1,"arrivalCityTimeZone":540,"departureProvinceId":1,"arrivalCityId":274}],"childCount":0,"segmentNo":1,"adultCount":1,"extensionAttributes":{},"transactionID":"c9ab78578e8342e8ba1101e5104fc5bd","directFlight":false,"departureCityId":1,"isMultiplePassengerType":0,"flightWay":"S","arrivalCityId":274,"departProvinceId":1}'# payload = payload.encode('UTF-8')headers = {'origin': "https://flights.ctrip.com",'sign': sign,'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",'content-type': "application/json;charset=UTF-8",'accept': "application/json",'transactionid': trans_id,'Host': "flights.ctrip.com",'content-length': "815",'Connection': "keep-alive",'cache-control': "no-cache"}response = requests.request("POST", url, data=json.dumps(post_dict), headers=headers)# print(response.text)return response.textif __name__ == '__main__':para = {"dep_code": "bbk", "arr_code": "bjs", "date": "2019-10-25", "adult": 1, "child": 2, "infant": 1}post_dicts = get_index(para)print(get_data(post_dicts))
python爬虫爬取携程国际机票航班信息,返回json串相关推荐
- python中飞机票购买程序_「最低折扣机票查询」Python 爬取携程所有机票找出最低折扣机票,让你无忧回家过年 - seo实验室...
最低折扣机票查询 前言 对于平时出行大多数人都是选择坐高铁,当然了如果这是对于距离比较近的行程是最划算的,如果对于路途长远的人言,提前购买飞机票价格可能比高铁票更加的便宜,如果我们可以爬取机票数据并分 ...
- Python爬虫爬取链家网上的房源信息练习
一 原链接:用Python爬虫爬取链家网上的房源信息_shayebuhui_a的博客-CSDN博客_python爬取链家 打开链家网页:https://sh.lianjia.com/zufang/ ...
- python爬虫爬取微信公众号小程序信息
python爬虫爬取微信公众号小程序信息 爬取内容 某汽车维修信息提供的维修店名称,地点以及电话(手机)号码 爬取步骤 啥也别管,先抓包看看,在这里,博主使用的抓包软件是charles 抓包:将网络传 ...
- 爬虫 — 爬取携程的航班信息
功能介绍:输入起点.终点.时间就能得到携程上的航班信息 代码: from prettytable import PrettyTable import requests import jsondef x ...
- JAVA爬虫爬取携程酒店数据selenium实现
在爬取携程的时候碰到很多的壁垒,接下来分析所有过程 1.根据以往经验最初想到用jsoup去解析每个HTML元素,然后拿到酒店数据,然后发现解析HTML根本拿不到id为hotel_list的div,所以 ...
- python爬虫——爬取拉勾上的职位信息
爬取拉勾网站岗位数据 1.调用网页 查找网页链接规律 写一个for循环,爬取每一个网页的职位信息 def down():for i in range(1,4):if i == 1:strUrl = & ...
- python爬虫-爬取微博转评赞data信息
利用python简单爬取新浪微博(转发/评论/点赞/blog文本)信息 import requests import json from jsonpath import jsonpath import ...
- Python 爬取携程所有机票
打开携程网,查询机票,如广州到成都. 这时网址为:http://flights.ctrip.com/booking/CAN-CTU-day-1.html?DDate1=2018-06-15 其中,CA ...
- python bs4 csv requests 爬虫 爬取携程火车票网址信息并保存
目的:爬取携程网址 火车 中的单程与中转 单程 url="https://trains.ctrip.com/trainbooking/search?tocn=%25e5%258d%2583% ...
最新文章
- oracle触发器超过上限,ORACLE系统触发器的疑问,限制IP登录数
- SpringCloud -创建统一的依赖管理
- 线程池及并发编程基础总结
- 美国OCC代理署长Brian Brooks将于今日离任,由首席运营官接任
- [系列教程] Discuz模板的制作方法
- Apache2 httpd.conf中文版
- Android斗地主源码实现
- php上传虚假图片,解决PHP上传多个图片并校验的代码问题
- Codeforces 474 D. Flowers
- 数据资产管理:大数据时代的新风口
- ubuntu服务器开放端口_Docker服务开放了这个端口,服务器分分钟变肉机
- 谁说Mac系统下不能搞科研?MacOS科研软件汇总
- 樱花大战资源分析之二 SPR文件格式不完全分析
- 下午茶,几个笑话提提神
- D3 二维图表的绘制系列(十六)矩形树状图
- 只用十行 Python 代码就提取了韦小宝的身份证信息
- 查看字符的所占字节数
- 【智能商务】海量商品查找利器—苏宁搜索系统
- java中调用谷歌的无界浏览器对页面元素进行截图
- 我得了一种很奇怪的病...