1.导入第三方库

import functools
import execjs
import traceback
from urllib.parse import quote_plus
import requests, json, time, datetime, random, re
from urllib.parse import quote
from user_check_proxy import Proxy_start
from logs import logDebug, logInfo
#代理自己加上，或者不加代理
from user_check_proxy import get_proxy2

#过客网支持淘宝、天猫、京东、苏宁、当当、网易考拉、亚马逊等商品网址

import warnings
warnings.filterwarnings('ignore')

2.手机端UA

def random_h5_ua():
h5_user_agent = ['Mozilla/5.0 (Linux; Android 5.1; OPPO A37m Build/LMY47I; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/4G Language/zh_CN', 'Mozilla/5.0 (Linux; Android 7.1.1; OPPO R11 Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/4G Language/zh_CN', 'Mozilla/5.0 (Linux; Android 5.1.1; OPPO R9 Plusm A Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN', 'Mozilla/5.0 (Linux; Android 7.1.1; OPPO R11 Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN', 'Mozilla/5.0 (Linux; Android 7.1.1; OPPO R11 Pluskt Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.5.23.1180 NetType/WIFI Language/zh_CN']
return random.choice(h5_user_agent)

3.PC端UA

def random_web_ua():
web_user_agent = [
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
]

return random.choice(web_user_agent)

4.先获取30天时间的方法

获取当前日期前30天日期, 不算当天, 返回 list

def days_ago():
today = time.strftime('%Y,%m,%d') # <class 'str'>
t = time.strptime(today, '%Y,%m,%d') # # <class 'time.struct_time'>
y, m, d = t[0:3]
# print(y, m, d)
thirty_days_list = []
# print("thirty_days_list:",thirty_days_list)
for dd in range(30, 0, -1):
Date = str(datetime.datetime(y, m, d) - datetime.timedelta(dd)).split()
days_b = Date[0] # .replace('-', '') # <class 'str'>
# print(days_b)
# if days_b[1][0] == '0':
# days_b[1] = days_b[1][1]
# if days_b[2][0] == '0':
# days_b[2] = days_b[2][1]
# days_before = '-'.join(days_b)
# print('--', days_before)
thirty_days_list.append(days_b)
return thirty_days_list

5.时间戳转换

def get_timestamp_str(timestamp):
# print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp)))
# print(type(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp))))
return time.strftime('%Y-%m-%d',time.localtime(timestamp))

def get_guoke_price_web(item_url):

# 获取代理，这里需要自己加上代理池或者云代理！！！！！！！！

下面一行，可以注释代理不用，请求的时候（proxies=proxies）删除！！！！！！
proxies = get_proxy2()

ua = random_web_ua()
k = quote_plus(item_url)
btnSearch = quote_plus('搜索')

6.开始请求url

url_01 = 'http://www.tool168.cn/?'
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
# 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547283438',
'Host': 'www.tool168.cn',
'Referer': 'http://www.tool168.cn/history/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
}

params = {
'm': 'history',
'a': 'view',
'k': k,
'btnSearch': btnSearch
}

response_html_01 = requests.get(url=url_01, headers=header, params=params ,proxies=proxies, verify=False,timeout=20)
result_html_01 = response_html_01.text
# print(result_html_01)
# print(result)
# print("result_html_01:",result_html_01)
checkCode = re.search('id="checkCodeId" value="(.*?)"', result_html_01).group(1)

# print(checkCode)

url_02 = "http://www.tool168.cn/dm/ptinfo.php"
header = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '108',
'Content-Type': 'application/x-www-form-urlencoded',
# 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547210101',
'Host': 'www.tool168.cn',
'Origin': 'http://www.tool168.cn',
'Referer': 'http://www.tool168.cn/?m=history&a=view&k={}&btnSearch={}'.format(k,btnSearch),
# 'Referer': f'http://www.tool168.cn/?m=history&a=view&k={k}&btnSearch={btnSearch}',
'User-Agent': ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}

data = {
# 'checkCode': "ce5e75b10ad46b1927895e0de48b5134",
'checkCode':checkCode,
'con': item_url,
# 'con': 'https://detail.tmall.com/item.htm?id=534068049215'
}

response_html_02 = requests.post(url=url_02, headers=header, data=data, proxies=proxies, verify=False,timeout=20)
result_html_02 = response_html_02.text
# print(result_html_02)
code = json.loads(result_html_02).get("code")
# print(code)

# url_03 = f"http://www.tool168.cn/dm/history.php?code={code}&t="
url_03="http://www.tool168.cn/dm/history.php?"
header = {
'Accept': 'text/plain, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
# 'Connection': 'keep-alive',
# 'Content-Length': '0',
# 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547203682',
'Host': 'www.tool168.cn',
'Origin': 'http://www.tool168.cn',
'Referer':'http://www.tool168.cn/?m=history&a=view&k={}'.format(item_url),
# 'Referer': 'http://www.tool168.cn/?m=history&a=view&k=https%3A%2F%2Fdetail.tmall.com%2Fitem.htm%3Fid%3D534068049217',
'User-Agent':ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}

params = {
"code":code,
# 'code': "0f72c0c84e6f722de6fb57f9feb3691e26545bc2991ffc290ed35271bb85549977d831788ac687b919d2670d35df4641b9ccc7be6e917dfc",
't': ''
}
response_html_03 = requests.post(url=url_03, headers=header, params=params, proxies=proxies, verify=False,timeout=20)
# result_response = response_html_03.text
# print(result_response)
response_html_03.encoding = "utf-8"
result_response =response_html_03.text.strip()
# print('result_response = ', result_response)
try:
if "对不起，没有找到。" in result_response:
result = "对不起，该商品未收录或加载异常！"
# return result
return None
else:
return result_response
except Exception as e:
# print(e)
return None

7.解析日期，历史价格

def parse(result_history_price):
thirty_date = days_ago()[0]
# 历史价格列表
history_price_list = []
for res in result_history_price:
dates = re.search('\((.*?)\)', res).group(1)
price = re.search('\),(.*?)]', res).group(1)
dates_prices = dates.split(",")
year = dates_prices[0]
month = dates_prices[1]
month = int(month) + 1
if len(str(month)) == 1:
month = '0' + str(month)
day = dates_prices[2]
if len(day) == 1:
day = '0' + day
shop_history_time = f"{year}-{month}-{day}"
end_price = price
history_price_list.append([shop_history_time, end_price])

# print('result_list_true = ', history_price_list)
# history_price[shop_history_time] = end_price
# result_response = json.dumps(history_price)

8.判断取出30天商品历史价格

thirty_days_price = [] # 删选出最近30天价格列表
for i in history_price_list:
if int(i[0].replace('-', '')) >= int(thirty_date.replace('-', '')):
thirty_days_price.append(i)
# print('thirty_days_price = ', thirty_days_price)
if thirty_days_price == []: # 没有最近一个月日期, 说明价格和几个月前价格一致
thirty_days_price = [[thirty_date, history_price_list[-1][1]]]
try: # 查询第一天日期
if history_price_list != [] and thirty_days_price != []:
if len(history_price_list) > len(thirty_days_price):
if int(thirty_date.replace('-', '')) not in [int(i[0].replace('-', '')) for i in thirty_days_price]:
h_days = [int(i[0].replace('-', '')) for i in history_price_list]
for i in range(0, len(h_days)):
if h_days[i] < int(thirty_date.replace('-', '')) < h_days[i + 1]:
p_index = i
break
thirty_days_price.insert(0, [thirty_date, history_price_list[p_index][1]])
except: # 否则，说明慢慢买也是在这个日期第一次收录进来的
pass
# print('thirty_days_price = ', thirty_days_price)
thirty_days_price_dict = {} # 接口最终返回
for price in thirty_days_price:
thirty_days_price_dict[price[0]] = int(float(price[1]) * 100)
# print('thirty_days_price_dict = ', thirty_days_price_dict)
return thirty_days_price_dict

def gkw_history_prices(item_url):
try:
result = get_guoke_price_web(item_url)
except:
# print(item_url,'--response_erro')
return None
# print("result:",result)
try:
result_history_price = re.search('chart\("(.*?)".*\);', result, re.S).group(1).replace("],[", "]，[").replace("Date.UTC", "").split("，")
thirty_days_price_dict = parse(result_history_price)
# print(item_url, '--', thirty_days_price_dict)
return thirty_days_price_dict
except:
# print(item_url, '--parse_erro')
return None

if __name__ == '__main__':

# 添加各大平台商品URL
item_url="https://item.jd.com/5475614.html"
print(gkw_history_prices(item_url))

实战各大平台商品比价--Python 爬取过客网商品历史价格(30天)相关推荐

python爬取当当网商品评论
python爬取当当网商品评论本案例获取某鞋评论作为例案例目的: 通过爬取当当网商品评价,介绍通过结合jsonpath和正则表达式获取目标数据的方法. 代码功能: 输入爬取的页数,自动下载保存每页 ...
用python爬取网站_「自如网」关于用python爬取自如网信息的价格问题(已解决) - seo实验室...
自如网 ###这是一篇求助文,我能获取图片并变成字符串,但是无法获取位移量### 前两坛突发奇想想要爬取自如网的租房数据,本来以为能够请求+美丽+ re能全部搞定,没想到这个网站的反爬机制有点让我搞不 ...
关于用python爬取自如网信息的价格问题(已解决)
###这是一篇求助文,我能获取图片并变成字符串,但是无法获取位移量### 前两坛突发奇想想要爬取自如网的租房数据,本来以为能够请求+美丽+ re能全部搞定,没想到这个网站的反爬机制有点让我搞不定先贴个 ...
Python 爬取淘宝商品的价格并保存到本地excel文件中
刚学Python爬虫没多久,老想着爬点什么.哈哈,刚好前段时间双11,就把淘宝爬了下. 不知道为什么,上次对淘宝进行页面读取不需要cookie就可以获取一些信息.现在需要cookie才能过去.话不多说 ...
python 知乎美女_知乎大神用Python爬取高颜值美女（爬虫+人脸检测+颜值检测）
原标题:知乎大神用Python爬取高颜值美女(爬虫+人脸检测+颜值检测) 1 数据源知乎话题『美女』下所有问题中回答所出现的图片 2 抓取工具 Python 3,并使用第三方库 Requests.l ...
爬取电商平台数据，python爬取某维商品数据
本次内容: 爬取电商平台数据,python爬取某维商品数据课程亮点动态数据抓包演示 json数据解析 requests模块的使用保存csv 环境介绍 python 3.8 [最好用和老师一样的版 ...
Python爬取京东任意商品数据实战总结
利用Python爬取京东任意商品数据今天给大家展示爬取京东商品数据首先呢还是要分思路的,我分为以下几个步骤: 第一步:得到搜索指定商的url 第二步:获得搜索商品列表信息第三步:对得到的商品数据 ...
Python爬取书包网文章实战总结
python爬取书包网文章总结今天闲来无事去看小说,但是发现没办法直接下载,所以呢就用python爬虫来下载一波了,哈哈- 爬取的是这篇小说:剑破九天(是不是很霸气,话不多说,开始-) 总体思路步骤 ...
用python爬取基金网信息数据，保存到表格，并做成四种简单可视化。（爬虫之路，永无止境！）
用python爬取基金网信息数据,保存到表格,并做成四种简单可视化.(爬虫之路,永无止境!) 上次 2021-07-07写的用python爬取腾讯招聘网岗位信息保存到表格,并做成简单可视化. 有的人留 ...

实战各大平台商品比价--Python 爬取过客网商品历史价格(30天)