废话就不说了,咱们直接上代码

def startGetData(self):index = 0while index < 3:index = index + 1self.url = "http://r.inews.qq.com/getQQNewsUnreadList?idfa=18454932-A441-4720-8973-776284A58B7F&apptype=ios&rtAd=1&screen_height=667&network_type=wifi&" \"store=1&activefrom=&global_info=0%7C1%7C1%7C1%7C1%7C4%7C2%7C1%7C2%7C0%7C1%7C2%7C2%7C0%7C&screen_scale=2&adcode=440112&screen_width=375&__qnr=2" \"08e" + str(self.getRondomStr()) + "&isJailbreak=1&qqnews_refpage=QNLaunchWindowViewController&omgid=96192500048fe94e120b203d6b2be528edb2001011321e&device_model=iPhone7%2" \"C2&pagestartFrom=icon&startFrom=icon&startarticleid=&devid=F3D6D07C-4D87-40B5-8533-A6E8D1331C89&omgbizid=1a9a8d53c482a14b7e7bd33737409def8cae006011" \"321e&appver=9.3.1_qqnews_5.5.60"time.sleep(2)self.parse_url(self.url, self.getBody(), self.getHeader())

这个是启动函数

def parse_url(self, url, body, header):response = requests.post(url, data=body, headers=header, verify=False)self.parse_json(response.content.decode("utf-8"))

网络抓取数据

def getBody(self):body = "cachedCount=12&adReqData=%7B%22chid%22%3A2%2C%22adtype%22%3A0%2C%22pf%22%3A%22iphone%22%2C%22launch%22%3A%220%22%2C%22ext%2" \"2%3A%7B%22mob%22%3A%7B%22mobstr%22%3A%22Aejy45%2BNeSZw4VxymYnnIhMV%2BMEM%2B6sW9RSCUl%5C%2F9xdgy%2BxrCQ6TAqjN60uZp%5C%2FkwPlA4" \"%2BwjM1T8AXgIGEry2mILkUiCjeN70vErvi4%5C%2FmstXaNzifAOoa7z%5C%2FtBFHEaFMuXU7nT4QWZqSqFPkQZtFndYKCzXlF0BpxSjiix7NQ55fW7kauVHkYlEI" \"UF%2BKIlWeD42St4AymSqB7MqMT343a%2BdeUKG9Qm6YMZrmG%5C%2FWgZZZ7LiwfgC7hox%5C%2FhhCwOHmOwSbiZKRdUGgs%2ByCxL6FPLofYZdVDSABgNzm95ie%2" \"B2Wdp25gHy%5C%2FyadnSpx6BokKCKMXOa6oZsz%5C%2FLtqRwFTPco7vKuptgpNHTn29wMeH2zfK4Kmb8Gg%2B9fTe8y3xQuC1lV81EZdDhL5QZfYkXh5%2BTcri%5C" \"%2F6sH0i3714dC0jB475JLTIeKL5H3i3sYt8CECIpGKyexU6tUwrYl%5C%2FlL%2BGfQy15pG09WxlMBWraD1iQuS9jLwcCrRRlsOe86Y8gun1094i74NgAsud%2B9HTiMsB" \"V4%5C%2FEItskEhTGXXV1V7Ps7yB8vzgsjWtHXyEacaUyWAnSfJDD%2BCFbwcPxNWuz%5C%2FYfeTryrTWyuMJh5mLiX2Ly9cuIuSYDciv679xopKEGDMtEC4tUq0x%2BKY" \"QY9R8EaEDfTcPZrG6BPhIwRA9WiG0oTCOzhEiewq7FTG373fbMjpLuikxQ%5C%2FUbf7B5SAnLj%5C%2F4MsIfX5XeJAfmbsvzqG8zZbEpYwbebgjYthwoEwqgmNcO28KEISy2" \"Z8uNW95qyuRlXokQBsVYPMb7l8isNsVZr0r9rCRFueMIlMtke6lkI1peXQNJbrhoOgqAUhcloxe7Ot%2Bqn9o0YutSr0RbWhycUV0%2Bc2DMAipZM4vtct7cMBYsVUuXP1GLBP1G" \"TInbkGKdpRPDKl7HXaLq0Zn9Cvs59zCbJc6ND0wQXfq%2BgGTFCIAcysbbNIejC2CiRcjlyUBLdsqp4tqD6uGTX3FByULEkDrE1DO7AHE%5C%2FPqg3An7CFR0BkRh1KsCd34GWerx" \"mB9WQIEa0tIUKZuRWkW3qZKJyo5eUieVcAI78Ul09C5JHwebRfPLQzSy1fTl4lgaKtmM2y3Lo6WY48P9PLCeQbA1lDSKw1Ku8U3wzOnmyieimQvdNAc0lEpOgykMhSfAva4lGvYGhvd" \"M7RzTq%2BaoJh4p2ip2Oa30gojevgjc%22%7D%7D%2C%22ver%22%3A%225.5.60%22%2C%22slot%22%3A%5B%7B%22islocal%22%3A0%2C%22orders_info%22%3A%5B%22503856" \"81%2C2120191%2C3602870493%2C19%2C101%2C110%2C1%22%2C%2250708076%2C2787691%2C2897359378%2C19%2C101%2C110%2C1%22%2C%2249679576%2C1918850882%2C28" \"90192620%2C19%2C4307%2C110%2C1%22%2C%2250645127%2C6868108%2C4173790238%2C1000%2C705%2C110%2C2%22%5D%2C%22channel%22%3A%22news_news_top%22%2C%22r" \"efresh_type%22%3A1%2C%22loid%22%3A%221%2C13%22%2C%22recent_rot%22%3A%5B%221%2C2%2C3%22%2C%224%22%5D%7D%5D%2C%22appversion%22%3A%22180319%22%7D&lon" \"=113.4367974175347&uid=A6D2B510-4986-4884-8250-964B34B8FB22&chlid=news_news_top&is_new_user=0&feedbackNewsId=NEW2018042302871600%7C0%2CTWF20180423" \"04111100%7C1%2CNEW2016111603351800%7C3%2CFIN2018042400905700%7C0%2C20180424A0FFXO00%7C0%2C20180424A0CP7I00%7C2%2C20180422A0UM4R00%7C0%2C20180423A1SOC" \"P00%7C2%2C20180423A0PJNH00%7C2%2CHVD2017TOP000000000%7C10%2C20180422A127CW00%7C0%2C20180423A1VAON00%7C0%2C20180423A1WOUH00%7C2%2C20180423A02FOZ00%7C0" \"%2C20180423A1VFAF00%7C0&newsTopPage=1&user_chlid=news_video_top%2Cnews_news_19%2Cnews_news_gz%2Cnews_news_ent%2Cnews_news_sports%2Cnews_news_mil&town_n" \"ame=Unknown&addPushNews=0&lat=23.10431803385417&feedbackModulePos=%28null%29%7C3%2C10&channelPosition=0&page=2&picType=0%2C0%2C0%2C0%2C0%2C2%2C0%2C2%2C" \"2%2C0%2C0%2C0%2C2%2C0%2C0&forward=0&adcode=440112&village_name=Unknown"return body

网络请求需要携带的参数

def parse_json(self, jsonStr):print(jsonStr)DataInfo.time = Util().getCurrTime()try:json_object = json.loads(jsonStr)if "adList" in json_object:adList = json_object["adList"]json_list = (json.loads(adList))["order"]for json_str in json_list:self.saveDataInfo(json_str)except KeyError as x:print(x)

解析抓取到的数据

def saveDataInfo(self, json_str):DataInfo.title = json_str["title"]DataInfo.channel = "tengxunxinwen"DataInfo.appdownload = json_str["url"]DataInfo.pic_list = self.getBitmap(json_str)DataInfo.device_type = "ios"DataInfo.type = 1MySqlManager().insert_inspection_list(1)

保存数据到mysql

def getBitmap(self, json_str):file_path = self.pathfilename = str(int(time.time() * 1000000)) + ".jpg"bitmap = {}bitmap_path = {}if "resource_url0" in json_str:DataInfo.source_type = 1bitmap["pic1"] = json_str["resource_url0"]bitmap_path["pic_path1"] = file_path + "pic1_" + filenameDataInfo.pic_path = bitmap_pathUtil().save_img(json_str["resource_url0"], "pic1_" + filename,file_path)elif "resource_urlList" in json_str:DataInfo.source_type = 2bitmap["pic1"] = json_str["resource_urlList"][0]["url"]bitmap_path["pic_path1"] = file_path + "pic1_" + filenameUtil().save_img(json_str["resource_urlList"][0]["url"], "pic1_" + filename,file_path)bitmap["pic2"] = json_str["resource_urlList"][1]["url"]bitmap_path["pic_path2"] = file_path + "pic2_" + filenameUtil().save_img(json_str["resource_urlList"][1]["url"], "pic2_" + filename,file_path)bitmap["pic3"] = json_str["resource_urlList"][2]["url"]bitmap_path["pic_path3"] = file_path + "pic3_" + filenameUtil().save_img(json_str["resource_urlList"][2]["url"], "pic3_" + filename,file_path)DataInfo.pic_path = bitmap_pathreturn bitmap

保存抓取到的图片到本地

Python3网络爬虫:腾讯新闻App的广告数据抓取相关推荐

  1. Python3网络爬虫:网易新闻App的广告数据抓取

    咱们就不说废话了,直接上完整的源码 def startGetData(self):self.url = "https://nex.163.com/q" body = self.ge ...

  2. Python3网络爬虫:今日头条新闻App的广告数据抓取

    咱们就不说废话了,直接上完整的源码 def startGetData(self):ret = random.randint(2, 10)index = 0 url = "" whi ...

  3. 手写网络协议栈-协议封装,netmap,dpdk网卡数据抓取,柔性数组

    今夜只有一个话题,手写网络协议栈,保证大家都能学会 1. 协议头的封装 2. netmap/dpdk的原理 3. 柔性数组的使用 视频讲解如下,点击观看: 手写网络协议栈-协议封装,netmap,dp ...

  4. Android新闻阅读器(数据抓取)

    第一篇技术博客,写得不好请见谅,谢谢(^_^) 由于最近师弟师妹们学习Android的需求,于是就写了此篇博客并且与各位分享一下. 整篇博客总共分为两部分. 第一部分搭建一个新闻列表界面(ListVi ...

  5. 企查查app新增企业数据抓取

    企查查每日新增企业数据抓取 尚未完成的工作: 需要自行抓包获取设备id,appid,sign等等 sign和时间戳保持一致即可 把所有的数据库.redis配置 无法自动登录,账号需要独立 redis数 ...

  6. 转:【Python3网络爬虫开发实战】6.4-分析Ajax爬取今日头条街拍美图

    [摘要] 本节中,我们以今日头条为例来尝试通过分析Ajax请求来抓取网页数据的方法.这次要抓取的目标是今日头条的街拍美图,抓取完成之后,将每组图片分文件夹下载到本地并保存下来. 1. 准备工作 在本节 ...

  7. 【Python3网络爬虫开发实战】6.4-分析Ajax爬取今日头条街拍美图

    [摘要] 本节中,我们以今日头条为例来尝试通过分析Ajax请求来抓取网页数据的方法.这次要抓取的目标是今日头条的街拍美图,抓取完成之后,将每组图片分文件夹下载到本地并保存下来. 1. 准备工作 在本节 ...

  8. python爬虫今日头条街拍美图开发背景_【Python3网络爬虫开发实战】6.4-分析Ajax爬取今日头条街拍美图...

    [摘要] 本节中,我们以今日头条为例来尝试通过分析Ajax请求来抓取网页数据的方法.这次要抓取的目标是今日头条的街拍美图,抓取完成之后,将每组图片分文件夹下载到本地并保存下来. 1. 准备工作 在本节 ...

  9. python网络爬虫学习笔记(7)动态网页抓取(二)实践

    文章目录 1 资料 2 笔记 2-1 准备 2-1-1. 网址 2-2-2 文本位置 2-2 代码 2-2-1 原型 2-2-2 ver0.1 1 资料 <Python网络爬虫从入门到实践> ...

最新文章

  1. 003小插曲之变量和字符串
  2. Android中对Handler用法的总结
  3. java 中文分词 比较_中文分词工具评估:chinese-segmentation-evaluation
  4. 2021高考萧山二中成绩查询,萧山中学2018高考成绩
  5. MapReduce论文中文版
  6. ubuntu远程登陆windows
  7. 【学术相关】如何找到研究的突破点?
  8. CentOS6.5 搭建Open***服务器
  9. java创建读取文件_Java实现文件的创建、读取、写入操作-Fun言
  10. 真香?小米9价格将上4000元!战斗天使真机长这样...
  11. 儿童学python下哪个软件好_少儿学编程适合哪个软件
  12. 【扫盲系列】网络术语
  13. 互联网晚报 | 7月16日 星期六 |抖音回应“外卖功能上线”;上半年国内生产总值同比增长2.5%;雷军8月将携小米工程样车亮相...
  14. python将姓王的都改成老王_全域明星-第46章:校长姓王,隔壁老王?-爱阅小说网...
  15. CRM IFD部署更换证书 - adfs证书更换
  16. 微信h5禁止分享和复制链接 超简单
  17. 小白学习cartopy画地图的第六天
  18. crc可以检出奇数个错误_CRC码检错能力的一个问题
  19. 美杜莎网站是哪个服务器,美杜莎云服务器
  20. Python之路点燃编程圈:源于不爽C语言,单枪匹马搞副业,如今吞噬世界(附链接)...

热门文章

  1. [014量化交易] python 通过tushare 获取股票数据、名称、股票代码、指定股票名
  2. 危机公关处理能力不可或缺的是企业成长能力
  3. OBB包围盒及其碰撞检测算法(一)
  4. java double 类型_关于Java中的double类型数据
  5. VLDB论文解读:阿里云超大规模实时分析型数据库AnalyticDB
  6. 荔枝派Zero小白(一)
  7. 别说华为语音助手不智能了,这3大隐藏功能都知道吗?实用又贴心
  8. 《信息化项目文档模板四——系统建设方案模板》
  9. SpringBoot项目的云服务器部署
  10. win10用html文件做壁纸,利用win10自带工具制作动态壁纸的简单方法