python 多线程代理爬取豆果美食app

python 多线程代理爬取豆果美食

爬取代码
加上代理
- 开始使用代理爬

爬取代码

自己最新测试19年6月18还能爬取的方式

import requests
from multiprocessing import Queue
import json
from save_mongo import mongo_info
from concurrent.futures import ThreadPoolExecutor
queues_list = Queue()
def heandel_request(url,data):header = {"client": "4","version": "6940.2","device": "HUAWEI MLA-AL10","sdk": "22,5.1.1","imei": "863064011228246","channel": "baidu","mac": "E4:F8:9C:F7:4F:22","resolution": "1280*720","dpi": "1.5","android-id": "1e4f89cf74f22378","pseudo-id": "9cf74f223781e4f8","brand": "HUAWEI","scale": "1.5","timezone": "28800","language": "zh","cns": "3","carrier": "CHINA+MOBILE","imsi": "460071228248156","User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; HUAWEI MLA-AL10 Build/HUAWEIMLA-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36","act-code": "e9d3a060cf2741ba937adda1c9f03fa2","act-timestamp": "1558788732",#"uuid": "5dd43ba9-e5ce-44a6-9766-9df287e8fe83","reach": "10000","newbie": "0","Content-Type": "application/x-www-form-urlencoded; charset=utf-8","Accept-Encoding": "gzip, deflate","Connection": "Keep-Alive",# "Cookie": "duid=59950651","Host": "api.douguo.net","Content-Length": "96",}respone = requests.post(url=url, headers=header,data=data)return responedef header_index():url = " http://api.douguo.net/recipe/flatcatalogs"data = {"client": "4","_session": "1560752474235863064011228246","keyword": "%E5%9C%9F%E8%B1%86","order": "0","_vs": "400","type": "0",}response = heandel_request(url,data)for data_list in json.loads(response.text)["result"]["cs"]:for names in data_list["cs"]:for name in names["cs"]:data2 = {"client": "4",# "_session": "1560752474235863064011228246","keyword": name["name"],"order": "0","_vs": "400","type": "0",}queues_list.put(data2)def heaher_shicai_content(data):print("当前处理的食材:",data["keyword"])# 请求前100条数据for i in range(6):shicai_url = "http://api.douguo.net/recipe/v2/search/{}/20".format(i*20)shicai = heandel_request(shicai_url,data)for item in json.loads(shicai.text)["result"]["list"]:shicai_info = {}shicai_info["name"] = data["keyword"]if item["type"] == 13:shicai_info["user_name"] = item["r"]["an"]shicai_info["caipu_id"] = item["r"]["id"]shicai_info["caipu_name"] = item["r"]["n"]shicai_info["context"] = item["r"]["cookstory"].replace("\n","").replace(" ","")shicai_info["shicai"] = item["r"]["major"]detial_url = "http://api.douguo.net/recipe/detail/"+str(item["r"]["id"])data3 = {"client": "4",#"_session": "1560771406377863064011228246","author_id": "0","_vs": "5900","_ext": '{"query":{"kw":"'+shicai_info["name"]+'","src":"2801","idx":"1","type":"13","id":"'+str(shicai_info["caipu_id"])+'"}}',}response = heandel_request(detial_url,data3)detial = json.loads(response.text)shicai_info["tips"] = detial["result"]["recipe"]["tips"]shicai_info["buzhu"] = detial["result"]["recipe"]["cookstep"]# print(json.loads(response.text))mongo_info.insert_item(shicai_info)print("当前写入的菜谱是："+shicai_info["caipu_name"])header_index()
pool = ThreadPoolExecutor(max_workers=20)
while queues_list.qsize() > 0:pool.submit(heaher_shicai_content,queues_list.get())
# heaher_shicai_content(queues_list.get())

加上代理

用自己弄得ip代理
我在这篇博客里介绍了一种爬取代理ip的方式。

https://blog.csdn.net/qq_40423339/article/details/92759849

开始使用代理爬

import requests
from multiprocessing import Queue
import json
from save_mongo import mongo_info
from concurrent.futures import ThreadPoolExecutor
import random
queues_list = Queue()
def heandel_request(url,data):header = {"client": "4","version": "6940.2","device": "HUAWEI MLA-AL10","sdk": "22,5.1.1","imei": "863064011228246","channel": "baidu","mac": "E4:F8:9C:F7:4F:22","resolution": "1280*720","dpi": "1.5","android-id": "1e4f89cf74f22378","pseudo-id": "9cf74f223781e4f8","brand": "HUAWEI","scale": "1.5","timezone": "28800","language": "zh","cns": "3","carrier": "CHINA+MOBILE","imsi": "460071228248156","User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; HUAWEI MLA-AL10 Build/HUAWEIMLA-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36","act-code": "e9d3a060cf2741ba937adda1c9f03fa2","act-timestamp": "1558788732",#"uuid": "5dd43ba9-e5ce-44a6-9766-9df287e8fe83","reach": "10000","newbie": "0","Content-Type": "application/x-www-form-urlencoded; charset=utf-8","Accept-Encoding": "gzip, deflate","Connection": "Keep-Alive",# "Cookie": "duid=59950651","Host": "api.douguo.net","Content-Length": "96",}with open("verified_proxies.json", "r") as f:ip_list = f.read()ip_list = ip_list.split("\n")# while True:#     ip_json = json.loads(ip_list[random.randint(0,len(ip_list))])#     proxy = {}#     proxy[ip_json["type"]] = ip_json["host"] + ":" + str(ip_json["port"])#     print(proxy)#     try:#         respone = requests.post(url=url, headers=header,data=data,proxies=proxy)#     except Exception:#         pass#     else:#         if respone.status_code == 200:#             breaki = 0while True:ip_json = json.loads(ip_list[i])proxy = {}proxy[ip_json["type"]] = ip_json["host"] + ":" + str(ip_json["port"])print(proxy)try:respone = requests.post(url=url, headers=header,data=data,proxies=proxy)except Exception:i += 1else:if respone.status_code == 200:breakelse:i += 1return responedef header_index():url = " http://api.douguo.net/recipe/flatcatalogs"data = {"client": "4","_session": "1560752474235863064011228246","keyword": "%E5%9C%9F%E8%B1%86","order": "0","_vs": "400","type": "0",}response = heandel_request(url,data)for data_list in json.loads(response.text)["result"]["cs"]:for names in data_list["cs"]:for name in names["cs"]:data2 = {"client": "4",# "_session": "1560752474235863064011228246","keyword": name["name"],"order": "0","_vs": "400","type": "0",}queues_list.put(data2)def heaher_shicai_content(data):print("当前处理的食材:",data["keyword"])# 请求前100条数据for i in range(6):shicai_url = "http://api.douguo.net/recipe/v2/search/{}/20".format(i*20)shicai = heandel_request(shicai_url,data)for item in json.loads(shicai.text)["result"]["list"]:shicai_info = {}shicai_info["name"] = data["keyword"]if item["type"] == 13:shicai_info["user_name"] = item["r"]["an"]shicai_info["caipu_id"] = item["r"]["id"]shicai_info["caipu_name"] = item["r"]["n"]shicai_info["context"] = item["r"]["cookstory"].replace("\n","").replace(" ","")shicai_info["shicai"] = item["r"]["major"]detial_url = "http://api.douguo.net/recipe/detail/"+str(item["r"]["id"])data3 = {"client": "4",#"_session": "1560771406377863064011228246","author_id": "0","_vs": "5900","_ext": '{"query":{"kw":"'+shicai_info["name"]+'","src":"2801","idx":"1","type":"13","id":"'+str(shicai_info["caipu_id"])+'"}}',}response = heandel_request(detial_url,data3)detial = json.loads(response.text)shicai_info["tips"] = detial["result"]["recipe"]["tips"]shicai_info["buzhu"] = detial["result"]["recipe"]["cookstep"]# print(json.loads(response.text))# mongo_info.insert_item(shicai_info)print("当前写入的菜谱是："+shicai_info["caipu_name"])header_index()
pool = ThreadPoolExecutor(max_workers=20)
while queues_list.qsize() > 0:pool.submit(heaher_shicai_content,queues_list.get())
# heaher_shicai_content(queues_list.get())

python 多线程代理爬取豆果美食app相关推荐

python爬电影_使用Python多线程爬虫爬取电影天堂资源
最近花些时间学习了一下Python,并写了一个多线程的爬虫程序来获取电影天堂上资源的迅雷下载地址,代码已经上传到GitHub上了,需要的同学可以自行下载.刚开始学习python希望可以获得宝贵的意见. ...
【附源码】计算机毕业设计Python安卓基于安卓的豆果美食APPou9ez（源码+程序+LW+调试部署）
[附源码]计算机毕业设计Python安卓基于安卓的豆果美食APPou9ez(源码+程序+LW+调试部署) 该项目含有源码.文档.程序.数据库.配套开发软件.软件安装教程项目运行环境配置: Pytho ...
python-python爬取豆果网（菜谱信息）
python-python爬取豆果网(菜谱信息) #-*- coding = utf-8 -*- #获取豆果网图片 import io from bs4 import BeautifulSoup im ...
Python爬虫：爬取华为应用市场app数据
爬取华为应用商店的所有APP名称和介绍,因为页面数据是ajax异步加载的,所以要找到对应的接口去解析数据. 爬取华为应用市场app数据一.分析网页 1. 分析主页 2. 分析appid 3. 分析u ...
python多线程爬虫爬取多个网页_Python 多线程抓取网页
最近,一直在做网络爬虫相关的东西. 看了一下开源C++写的larbin爬虫,仔细阅读了里面的设计思想和一些关键技术的实现. 1.larbin的URL去重用的很高效的bloom filter算法: 2. ...
python多线程爬虫爬取多个网页_python多线程爬虫爬取顶点小说内容（BeautifulSoup+urllib）...
思路之前写过python爬取起点中文网小说,多线程则是先把爬取的章节链接存到一个列表里,然后写一个函数get_text每次调用这个函数就传一个章节链接,那么就需要调用n次该函数来获取n章的内容,所以 ...
Python多线程遍历爬取FTP文件(附可实现源码)
目录应用目标思路分析 1.扫描网段 2.远程建立FTP连接 3.遍历读取写入文件完整源码(可运行) 写在最后应用目标 1.扫描网段,获取其中所有的开放FTP服务的机器的IP地址 2.依次遍历获 ...
python多线程爬虫爬取喜马拉雅网页所有带id的音频
本学期python课程设计: 设计流程图: 代码实现: #-*-coding:utf-8-*- import requests import re import os import multiproc ...
Python加密破解爬取七麦数据网APP榜单数据
免责声明七麦数据(原ASO100)是七麦科技推出的国内专业的移动应用数据分析平台,是国内同时打通App数据.微信公众号数据.小程序数据的数据分析平台. 软件均仅用于学习交流,请勿用于任何商业用途!感 ...
豆果美食，把人们带回厨房
一.先从基本的煮饭做菜流程来剖析吧 [煮饭步骤] 1.做什么菜: 2.准备食材.作料: 3.开煮: 4.上菜: [根据煮饭步骤引申需求点] 1.怎么选--中国饮食文化的地域性差异非常明显,因此五千年的 ...

python 多线程代理爬取豆果美食app

python 多线程代理爬取豆果美食

爬取代码

加上代理

开始使用代理爬

python 多线程代理爬取豆果美食app相关推荐

最新文章

热门文章

python 多线程 代理 爬取 豆果美食app

python 多线程 代理 爬取 豆果美食

爬取代码

加上代理

开始使用代理爬

python 多线程 代理 爬取 豆果美食app相关推荐

最新文章

热门文章

python 多线程代理爬取豆果美食app

python 多线程代理爬取豆果美食

python 多线程代理爬取豆果美食app相关推荐