雪球网交易数据爬取,python源码。
雪球是一个投资者的社交网络平台,爬取交易数据。

代码:

def get_trade_behavior(uid):import requestsimport randomimport timeimport jsonresult = []res = []headers = [{'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",'Accept': 'application/json, text/plain, */*','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",'Accept': 'application/json, text/plain, */*','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'}]s = requests.Session()s.keep_alive = False# t = 1try:# while True:url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=1"obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json()time.sleep(random.random() * 3)maxpage = obj["maxPage"]# if obj["list"] != []:for k in range(1, maxpage + 1):url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=" + str(k)print("正在检索{%s}-第%d页-总共%d页" % (uid, k, maxpage))obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json()time.sleep(random.random() * 3)for i in obj["list"]:res.append(uid)time_stamp = i["updated_at"]time_stamp_10 = int(round(time_stamp) / 1000)time_local = time.localtime(time_stamp_10)trade_time = time.strftime("%Y-%m-%d %H:%M:%S", time_local)trade_history_stock_name = i["rebalancing_histories"][0]["stock_name"]trade_history_stock_symbol = i["rebalancing_histories"][0]["stock_symbol"]trade_history_stock_prev_weight = i["rebalancing_histories"][0]["prev_weight_adjusted"]trade_history_stock_target_weight = i["rebalancing_histories"][0]["target_weight"]trade_history_stock_exec_price = i["rebalancing_histories"][0]["price"]res.append(trade_time)res.append(trade_history_stock_name)res.append(trade_history_stock_symbol)res.append(trade_history_stock_prev_weight)res.append(trade_history_stock_target_weight)res.append(trade_history_stock_exec_price)res_copy = res.copy()result.append(res_copy)res.clear()print("{%s} 检索完毕!" % uid)return resultexcept:print("{%s} 异常!" % uid)return [uid, "异常"]def read_csv(name):import csv'''读取CSV文件数据'''csv_file = csv.reader(open("C:\\Users\\viemax\\Desktop\\" + name + ".csv", "r"))object_website = []for i in csv_file:object_website.append(i)# print(i)return object_websiteno_data_id = read_csv("no_data_id")obj = []
for i in no_data_id[2:]:obj.append(i[1])res = []
for i in obj[0::2]:r = get_trade_behavior(i)res.append(r)
def xueqiu(num):import requestsfrom bs4 import BeautifulSoupimport randomimport timeurl = u"https://xueqiu.com/P/SP" + numheaders = [{'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",'Accept': 'text/html;q=0.9,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",'Accept': 'application/json, text/plain, */*','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'},{'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",'Accept': 'application/json, text/plain, */*','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection': 'close'}]cookie = [dict(cookies_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; _gat_gtag_UA_16079156_4=1; xq_a_token=18b7f7dec4f54032863219716eaf839ee940199d; xqat=18b7f7dec4f54032863219716eaf839ee940199d; xq_r_token=f27bcc9f6c7b6446279ee9448db195b118b8f17c; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); xq_is_login=1; u=7147604028; __utmb=1.52.10.1540828390; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835763"),dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); __utmb=1.52.10.1540828390; _gat_gtag_UA_16079156_4=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token=bb8e27cca180872ab70314097a5077578ff119c8; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1559188240; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835848"),dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; xq_a_token=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xqat=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xq_r_token=b004ebba4649dfef7bba54f6ae7b703e5bca6a61; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); xq_is_login=1; u=1497969916; __utmb=1.56.10.1540828390; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835925"),dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; __utma=1.312459015.1529772425.1540825606.1540828390.19; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); aliyungf_tc=AQAAAAVyoiWa1w4AKvJZ2ozyzTPwnciM; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540829378,1540829450,1540836740,1540866196; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=4458f8df93a013c35835d0320917b19dcaab0a24; xq_a_token.sig=FfAS5LGC_XBO11rmXuA6Nb3o4VI; xqat=4458f8df93a013c35835d0320917b19dcaab0a24; xqat.sig=t2g7eE2UG80Frcg03R-7nudVIBA; xq_r_token=4812b56991883e9913998e8816706912bff911e8; xq_r_token.sig=R6AgMpKf0fhe6GkWdS_etJ0Y3Dw; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=6146826778; u.sig=h5P6Xki5cmObHzNcRMVufpWUnZc; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540866325")]s = requests.Session()# s.keep_alive = Falsetry:cookies = random.choice(cookie)obj = s.get(url, headers=random.choice(headers), cookies=cookies, stream=True, allow_redirects=False, timeout=20)time.sleep(8 + random.random() * 3.2)bs = BeautifulSoup(obj.content, 'lxml')except requests.exceptions.Timeout:print([num, "timeout", "timeout"])return [num, "timeout", "timeout"]try:try:res_current = bs.find_all(attrs={"class": "cube-closed"})[0].get_text()except IndexError:res_current = "未关停!"res_id = bs.find_all(attrs={"class": "creator fn-clear"})[0].attrs["href"]s.close()print([num, res_id[1:], res_current])return [num, res_id[1:], res_current]except IndexError:try:res_404 = bs.find("title").get_text()if res_404 == "404_雪球":s.close()print([num, "NaN", res_404])return [num, "NaN", res_404]except AttributeError:s.close()print([num, "AttributeError", "page_error"])return [num, "AttributeError", "page_error"]
result = []
res_final = []
res_final.extend(res)
res_final.extend(res_0)for i in res_final:if i != []:result.append(i)final = []
for i in result:if i[1] != "异常":final.append(i)except_id = []
for i in result:if i[1] == "异常":except_id.append(i)need = []
for i in final:need.extend(i)

python爬取雪球网交易数据相关推荐

  1. python爬虫金融数据_python爬虫项目-爬取雪球网金融数据(关注、持续更新)

    (一)python金融数据爬虫项目 爬取目标:雪球网(起始url:https://xueqiu.com/hq#exchange=cn&firstname=1&secondname=1_ ...

  2. Python爬虫 爬取雪球网部分数据

    import requests import jsonurl = {'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?s ...

  3. 简单使用resquests爬取雪球网数据,分析股票走势

    简单使用resquests爬取雪球网数据,分析股票走势 import requests import pymongo import json # 数据库初始化 client = pymongo.Mon ...

  4. PyQt5制作一个爬虫小工具,爬取雪球网上市公司的财务数据

    本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,如有问题请及时联系我们以作处理. 以下文章来源于可以叫我才哥 ,作者:可以叫我才哥 最近有朋友需要帮忙写个爬虫脚本,爬取雪球网一些上 ...

  5. 技术图文:如何利用 C# 爬取 ONE 的交易数据?

    投资一个金融产品,最基本的就是拿到这个金融产品的交易数据,对这些数据进行可视化来判断趋势.去年,我在听 李笑来 讲区块链的课程上知道了 BigOne 这个由 INB 投资的交易所,而 ONE 是 Bi ...

  6. python实战|python爬取58同城租房数据并以Excel文件格式保存到本地

    python实战|python爬取58同城租房数据并以Excel文件格式保存到本地 一.分析目标网站url 目标网站:https://cq.58.com/minsuduanzu/ 让我们看看网站长啥样 ...

  7. python爬取微博热搜数据并保存!

    主要用到requests和bf4两个库将获得的信息保存在d://hotsearch.txt下importrequests;importbs4mylist=[]r=requests.get(ur- 很多 ...

  8. Python爬取京东任意商品数据实战总结

    利用Python爬取京东任意商品数据 今天给大家展示爬取京东商品数据 首先呢还是要分思路的,我分为以下几个步骤: 第一步:得到搜索指定商的url 第二步:获得搜索商品列表信息 第三步:对得到的商品数据 ...

  9. python 爬取24小时天气数据

    python 爬取24小时天气数据 1.引入相关库 # -*- coding: utf-8 -*- import requests import numpy as np 关于爬虫,就是在网页上找到自己 ...

  10. 用python爬取基金网信息数据,保存到表格,并做成四种简单可视化。(爬虫之路,永无止境!)

    用python爬取基金网信息数据,保存到表格,并做成四种简单可视化.(爬虫之路,永无止境!) 上次 2021-07-07写的用python爬取腾讯招聘网岗位信息保存到表格,并做成简单可视化. 有的人留 ...

最新文章

  1. 1-2-Active Directory 域服务准备概述
  2. OSSIM平台安全事件关联分析实践
  3. 10.IDA-基本操作
  4. 求助:关于sql如何统计时间的问题
  5. 8个超震撼的HTML5和纯CSS3动画源码
  6. 外架小横杆外露长度规范要求_安全文明施工规范
  7. 6本书,读懂2022年最火的边缘计算
  8. Hive导入csv文件
  9. 最大流模板(Edmonds-Karp)
  10. 使用MAT分析dump文件
  11. wincc用c语言弹出确认窗口,wincc怎样弹出确认窗口?
  12. 鸿蒙系统报名选择一个应用,申请鸿蒙系统有一个应用选择怎么选择呢
  13. 柴夥說算法(4)--再說樹
  14. 王牌战士服务器维修,《王牌战士》8月12日更新维护公告 英雄免费解锁两周年专属活动...
  15. c语言报告对老师的致谢,论文致谢对老师的感谢-论文老师的致谢词怎么写?
  16. PythonGUI颜色扩散游戏
  17. nginx openresty waf动态黑名单 白名单 、centos 、windows下部署
  18. 小布老师讲座笔记(四)
  19. Linux云主机开启IPv6服务
  20. Webservice与Servlet

热门文章

  1. 信息检索1.3.学术搜索引擎--谷歌学术搜索引擎
  2. gke google_使用ExternalDNS扩展GKE
  3. 一个windows右键关联工具
  4. 利用微信官方文档下载微信对账单、资金账单
  5. Storm运行原理探索
  6. 2021SC@SDUSC Zxing开源代码(十三)Aztec二维码(二)
  7. HashMap和TreeMap排序
  8. 计算机办公操作excel,办公中常用的Word及Excel的方法有哪些
  9. 求递推数列通项的特征根法
  10. 8051蜂鸣器程序c语言,求51单片机蜂鸣器程序