目的

获取同城旅游酒店评价信息

详细需求

https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31

思路解析

一 、F12

从这里已经知道数据的获取方式,请求链接--返回数据--提取数据,
这里需要做的就是构建请求

二、请求分析

三、js调试








到此,所有需要的参数都已经找到了,那么根据需要进行改写本地js调试

四 本地JS调试

function token(e) {var a56 = {utf8: {stringToBytes: function(e) {return a56.bin.stringToBytes(unescape(encodeURIComponent(e)))},bytesToString: function(e) {return decodeURIComponent(escape(a.bin.bytesToString(e)))}},bin: {stringToBytes: function(e) {for (var t = [], a = 0; a < e.length; a++)t.push(255 & e.charCodeAt(a));return t},bytesToString: function(e) {for (var t = [], a = 0; a < e.length; a++)t.push(String.fromCharCode(e[a]));return t.join("")}}};// 这里t取任意值都行// var t = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var t = null;var n, i, o, s, r;// n = a117,n = {rotl: function(e, t) {return e << t | e >>> 32 - t},rotr: function(e, t) {return e << 32 - t | e >>> t},endian: function(e) {if (e.constructor == Number)return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);for (var t = 0; t < e.length; t++)e[t] = n.endian(e[t]);return e},randomBytes: function(e) {for (var t = []; e > 0; e--)t.push(Math.floor(256 * Math.random()));return t},bytesToWords: function(e) {for (var t = [], a = 0, n = 0; a < e.length; a++,n += 8)t[n >>> 5] |= e[a] << 24 - n % 32;return t},wordsToBytes: function(e) {for (var t = [], a = 0; a < 32 * e.length; a += 8)t.push(e[a >>> 5] >>> 24 - a % 32 & 255);return t},bytesToHex: function(e) {for (var t = [], a = 0; a < e.length; a++)t.push((e[a] >>> 4).toString(16)),t.push((15 & e[a]).toString(16));return t.join("")},hexToBytes: function(e) {for (var t = [], a = 0; a < e.length; a += 2)t.push(parseInt(e.substr(a, 2), 16));return t},bytesToBase64: function(e) {for (var t = [], n = 0; n < e.length; n += 3)for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");return t.join("")},base64ToBytes: function(e) {e = e.replace(/[^A-Z0-9+\/]/gi, "");for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);return t}},i = a56.utf8,o = null,s = a56.bin,(r = function(e, t) {e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);a[l >>> 5] |= 128 << l % 32,a[14 + (l + 64 >>> 9 << 4)] = l;var f = r._ff, h = r._gg, v = r._hh, g = r._ii;for (m = 0; m < a.length; m += 16) {var y = c, _ = d, b = p, $ = u;d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),c = c + y >>> 0,d = d + _ >>> 0,p = p + b >>> 0,u = u + $ >>> 0}return n.endian([c, d, p, u])})._ff = function(e, t, a, n, i, o, s) {var r = e + (t & a | ~t & n) + (i >>> 0) + s;return (r << o | r >>> 32 - o) + t},r._gg = function(e, t, a, n, i, o, s) {var r = e + (t & n | a & ~n) + (i >>> 0) + s;return (r << o | r >>> 32 - o) + t},r._hh = function(e, t, a, n, i, o, s) {var r = e + (t ^ a ^ n) + (i >>> 0) + s;return (r << o | r >>> 32 - o) + t},r._ii = function(e, t, a, n, i, o, s) {var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;return (r << o | r >>> 32 - o) + t},r._blocksize = 16,r._digestsize = 16var a = n.wordsToBytes(r(e, t));return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a)}e = (new Date).getTime().toString()console.log(token(e))

效果实现

pycharm直接运行js代码,需要环境支持,未安装,则百度pycharm安装node.js


五,思路汇总

1.获取token值
2.构建请求链接
3.提取评价数据

源码实现

import time
import requests
import execjse = int(time.time() * 1000)  # 时间戳'加载本地js文件'
with open('tongchenglvxing.js', 'r', encoding='utf-8') as f:ctx = execjs.compile(f.read())token = ctx.call('token', str(e))  # 调用函数,生成token# 评价url,可修改酒店ID与页码进行目标获取,若修改酒店id,则referer也需要修改
url = "https://www.ly.com/hotel/api/tmapi/comment/list/?hotelid=52003119&page=2&pageSize=10&commentType=0&roomTypeId=&tripPurposeId=&RankType=1&mainTagId=&subTagId=&antitoken={}".format(token)headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36","Referer": "https://www.ly.com/HotelInfo-52003119.html?spm0=10002.2001.1.0.1.4.31","Cookie": 'Hm_lvt_64941895c0a12a3bdeb5b07863a52466=1602295824; Hm_lpvt_64941895c0a12a3bdeb5b07863a52466=1602295824; 17uCNRefId=RefId=6928722&SEFrom=baidu&SEKeyWords=; CNSEInfo=RefId=6928722&tcbdkeyid=&SEFrom=baidu&SEKeyWords=&RefUrl=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuFsbm_4VERms_CCVvyn5vR74d_j8_i5hmKmmvPhj_5S%26wd%3D%26eqid%3D83e1960b000aba54000000065f811809; TicketSEInfo=RefId=6928722&SEFrom=baidu&SEKeyWords=; __tctmu=144323752.0.0; __tctmz=144323752.1602295823130.1.1.utmccn=(organic)|utmcmd=organic|utmEsl=gb2312|utmcsr=baidu|utmctr=; longKey=1602295823689244; __tctrack=0; qdid=-9999; Hm_lvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602295833; Hm_lvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602295834; wangba={}'.format(e) + '; firsttime=1602295835354; sug_act_info=; __tctmc=144323752.9037491; trace_token=; __tctmd=144323752.737325; __tccgd=144323752.0; route=95e291ab3c2e10cfb4323494ebf2a6bd; Hm_lpvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1602310138; Hm_lpvt_f97c1b2277f4163d4974e7b5c8aa1e96=1602310139; User-Ref-SessionId=fc5c-14fa-7bf9-14e9-015a-1d1e; trace_extend={"deviceid":"1602295823689244","appid":"1","userid":"1602295823689244","orderfromid":"57000","sessionid":"fc5c-14fa-7bf9-14e9-015a-1d1e","pvid":"4cea3129"}; __tctma=144323752.1602295823689244.1602295823130.1602305312867.1602310135480.5; __tctmb=144323752.881068342023288.1602310135480.1602310135480.1; lasttime=1602310899814'}response = requests.request("GET", url, headers=headers, verify=False).json()#json解析
res = response.get('response').get('body').get('dpList')  #获取评价列表
for i in res:print(i.get('dpContent'))

效果实现

参考文章
作者:不吃夹生饭
https://zhuanlan.zhihu.com/p/54627024

python爬虫进阶-同城旅游酒店评价(JS逆向)相关推荐

  1. python爬虫进阶-1688工厂信息(JS逆向-sign签名验证)

    目的 获取1688工厂名片的相关信息 详细需求 一.进入1688网站 https://www.1688.com/ 二.使用"工厂"这个搜索框 三.输入工厂名称进行搜索,如" ...

  2. 《封号码罗》python爬虫之企某科技网站js逆向(十四)

    首先查看ajax加载,可以发现,其所有的数据都是加密方式到前端页面,由前端页面js解密之后再渲染到网页中 根据其关键字 encrypt_data进行全局搜索,寻找js的解密代码 这个地方就是解密代码, ...

  3. 大家沉迷短视频无法自拔?Python爬虫进阶,带你玩转短视频

    大家好,我是辣条. 现在短视频可谓是一骑绝尘,吃饭的时候.休息的时候.躺在床上都在刷短视频,今天给大家带来python爬虫进阶 :美拍视频地址加密解析. 短视频js逆向解析 抓取目标 工具使用 重点学 ...

  4. Python 爬虫进阶必备 | 某音乐网站查询参数加密逻辑分析(分离式 webpack 的加密代码扣取详解)...

    点击上方"咸鱼学Python",选择"加为星标" 第一时间关注Python技术干货! 今日网站 aHR0cDovL3d3dy5rdXdvLmNuL3NlYXJj ...

  5. Python爬虫进阶五之多线程的用法

    前言 我们之前写的爬虫都是单个线程的?这怎么够?一旦一个地方卡到不动了,那不就永远等待下去了?为此我们可以使用多线程或者多进程来处理. 首先声明一点! 多线程和多进程是不一样的!一个是 thread ...

  6. Python爬虫进阶——urllib模块使用案例【淘宝】

    Python爬虫基础--HTML.CSS.JavaScript.JQuery网页前端技术 Python爬虫基础--正则表达式 Python爬虫基础--re模块的提取.匹配和替换 Python爬虫基础- ...

  7. python爬虫进阶-每日一学(字体反爬-移花接木)

    目的 分析与学习更多的字体反爬套路 详细需求 url:http://glidedsky.com/level/web/crawler-font-puzzle-2 思路解析 一.审查 二.分析 impor ...

  8. 一文看懂Python 爬虫 进阶(三)

    一文看懂Python 爬虫 进阶(三) 文章目录 一文看懂Python 爬虫 进阶(三) **猫眼电影(xpath)** **链家二手房案例(xpath)** **百度贴吧图片抓取** 这篇几乎都是代 ...

  9. python爬虫项目实战教学视频_('[Python爬虫]---Python爬虫进阶项目实战视频',)

    爬虫]---Python 爬虫进阶项目实战 1- Python3+Pip环境配置 2- MongoDB环境配置 3- Redis环境配置 4- 4-MySQL的安装 5- 5-Python多版本共存配 ...

最新文章

  1. 采集练习(一) php 获得全国的小学(数据来自腾讯朋友网)
  2. 一个普通80后的IT Pro去溜冰的感慨
  3. 贪婪算法、递归计算、动态规划背包问题
  4. Python实训day08pm【面试和职场的经验分享】
  5. tensorboard 使用教程
  6. SpringSecurity 权限控制之开启动态权限注解支持
  7. 22.PATH环境变量
  8. C++11 多线程库使用说明
  9. 限制RICHTEXTBOX的输入的范围
  10. python request headers获取_Python爬虫实战—— Request对象之header伪装策略
  11. 940mx黑苹果驱动_黑苹果 Clover 驱动配置文件分享
  12. 计算机发展的新技术,计算机新技术发展的有关论文
  13. FineReport程序网络报表 - Hello,World
  14. [数图] 实验四 图像的滤波处理与图像空间变换
  15. Markdown部分语法使用
  16. 2022-2028全球与中国数据管理平台(DMP)市场现状及未来发展趋势
  17. Request库的相关用法
  18. 什么是JDBC,JDBC是干嘛用的?
  19. 阿里云神龙团队拿下 TPCx-BB 排名第一的背后技术
  20. java多文件压缩为ZIP

热门文章

  1. 201521123037 《Java程序设计》第7周学习总结
  2. IT小盆友:注意20种习惯最耗元气
  3. 【VMCloud云平台】SCAP(四)连接公有云(二)
  4. 扩视教育 | 机器视觉培训大纲labview
  5. 请问肾阴虚吃什么药?饮食注意什么?还有桂附地黄丸是治肾阴虚还是治肾阳虚的?谢谢...
  6. oracle11g自动内存管理好吗,Oracle11G新特性的研究之【自动内存管理】
  7. msray网址采集软件即将上线国外搜素引擎-QWANT
  8. 【小程序模板】功能模块+红色招聘信息资讯小程序网页模板+行业职位招聘小程序+招聘信息网页下载
  9. 2022-2028全球及中国葡萄种植市场研究报告
  10. HTTP协议:三.HTTP 报文信息