import json

import csv

import requests

import re

import time

def getchina(str1): # 提取中文

res1 = ''.join(re.findall('[\u4e00-\u9fa5]',str1))

return res1

def gettime(timeStamp): # 将时间戳转为时间字符串

timeArray = time.localtime(timeStamp)

#otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)

otherStyleTime = time.strftime("%Y-%m-%d", timeArray)

return otherStyleTime

def writecsv(data): # 将数据写入csv

for item in data:

#item['target']['content']

if 'title' not in item['target'].keys():

#print(item['target']['question']['title'],gettime(item['target']['updated_time']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content']))

with open('ifo.csv','a',newline='') as f:

csv_writer = csv.writer(f)

csv_writer.writerow([item['target']['question']['title'],gettime(item['target']['updated_time']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content'])])

else :

#print(item['target']['title'],gettime(item['target']['updated']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content']))

with open('ifo.csv','a',newline='') as f:

csv_writer = csv.writer(f)

csv_writer.writerow([item['target']['title'],gettime(item['target']['updated']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content'])])

with open('ifo.csv','a',newline='',encoding='utf-8') as f:

csv_writer = csv.writer(f)

csv_writer.writerow(["title","time","name","voteup","comment","content"])

url = "http://www.zhihu.com/api/v4/topics/21238418/feeds/essence?include=data%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Danswer%29%5D.target.content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Danswer%29%5D.target.is_normal%2Ccomment_count%2Cvoteup_count%2Ccontent%2Crelevant_info%2Cexcerpt.author.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Darticle%29%5D.target.content%2Cvoteup_count%2Ccomment_count%2Cvoting%2Cauthor.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Dpeople%29%5D.target.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.annotation_detail%2Ccontent%2Chermes_label%2Cis_labeled%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Canswer_type%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.author.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.paid_info%3Bdata%5B%3F%28target.type%3Darticle%29%5D.target.annotation_detail%2Ccontent%2Chermes_label%2Cis_labeled%2Cauthor.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dquestion%29%5D.target.annotation_detail%2Ccomment_count%3B&limit=10&offset=0"

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",

"Refer":"https://www.zhihu.com/"

}

resp = requests.get(url,headers=headers)

content = resp.content.decode('utf-8')

res = json.loads(content)

data = res['data']

count = 0

while(res['paging']['next']!=url):

count = count + 1

print(count)

writecsv(data)

url = res['paging']['next']

resp = requests.get(url,headers=headers)

content = resp.content.decode('utf-8')

res = json.loads(content)

data = res['data']

python爬知乎_python爬行求知。,爬取,知乎,精华相关推荐

  1. python爬虫电影信息_Python爬虫入门 | 爬取豆瓣电影信息

    这是一个适用于小白的Python爬虫免费教学课程,只有7节,让零基础的你初步了解爬虫,跟着课程内容能自己爬取资源.看着文章,打开电脑动手实践,平均45分钟就能学完一节,如果你愿意,今天内你就可以迈入爬 ...

  2. python网络爬虫代理服务器_python爬虫如何抓取代理服务器

    一年前突然有个灵感,想搞个强大的网盘搜索引擎,但由于大学本科学习软件工程偏嵌入式方向,web方面的能力有点弱,不会jsp,不懂html,好久没有玩过sql,但就是趁着年轻人的这股不妥协的劲儿,硬是把以 ...

  3. python爬虫贴吧_Python爬虫如何爬取贴吧内容

    爬取贴吧内容 先了解贴吧url组成: 每个贴吧url都是以'https://tieba.baidu.com/f?'开头,然后是关键字 kw=''贴吧名字'',再后面是 &pn=页数 (pn=0 ...

  4. python爬虫实例手机_Python爬虫实现爬取京东手机页面的图片(实例代码)

    实例如下所示: __author__ = 'Fred Zhao' import requests from bs4 import BeautifulSoup import os from urllib ...

  5. python爬虫金融数据_python爬虫项目-爬取雪球网金融数据(关注、持续更新)

    (一)python金融数据爬虫项目 爬取目标:雪球网(起始url:https://xueqiu.com/hq#exchange=cn&firstname=1&secondname=1_ ...

  6. python爬虫外贸客户_python实战成功爬取海外批发商价格信息并写入记事本

    运行平台:windows Python版本:Python 3.7.0 用到的第三方库:requests ,Beautiful Soup,re IDE:jupyter notebook 浏览器:Chro ...

  7. python 百度云盘 数据迁移_python爬虫,爬取百度云盘,找你兄弟的机器活塞运动原理文件?...

    寻找并分析百度云的转存api 首先你得有一个百度云盘的账号,然后登录,用浏览器(这里用火狐浏览器做示范)打开一个分享链接.F12打开控制台进行抓包.手动进行转存操作:全选文件->保存到网盘-&g ...

  8. python外国网站爬虫_python 网络爬虫-爬取网页外部网站

    前言 上一篇中我们在维基百科的内部网站上随机跳转进入文章类网页,而忽视外部网站链接.本篇文章将处理网站的外部链接并试图收集一些网站数据.和单个域名网站爬取不同,不同域名的网站结构千差万别,这就意味我们 ...

  9. python爬去新浪微博_Python 超简单爬取新浪微博数据 (高级版)

    新浪微博的数据可是非常有价值的,你可以拿来数据分析.拿来做网站.甚至是*.不过很多人由于技术限制,想要使用的时候只能使用复制粘贴这样的笨方法.没关系,现在就教大家如何批量爬取微博的数据,大大加快数据迁 ...

最新文章

  1. 生产环境WEB服务管理脚本之日志检测脚本
  2. OSPF详解-2 区域结构
  3. 【博客话题】我的2011项目总结
  4. js代码收集(1)_隐藏div、table间隔样式设置
  5. json loads No JSON object could be decoded 问题解决
  6. python asyncio_Python 的异步 IO:Asyncio 简介
  7. koa --- [MVC实现之四]Router、Controller、Service的实现
  8. ref是什么意思_终于有人说出A股不敢说的话:为什么股价不断下跌,大单却持续流入,你知道是什么缘由吗?...
  9. 利用win7系统自身修复还原功能
  10. Python 解释器中使用help()命令如何退出
  11. 我有几个粽子,和一个故事
  12. ArcGIS行政区位图制作流程(附行政区划练习数据)
  13. C#语言与Java语言程序的比较[转自chinaitlab]
  14. AI视频增强,提高视频画面的清晰度
  15. 谈一谈凑单页的那些优雅设计
  16. c语言消消乐字母游戏代码,基于pygame的小游戏———数字消消乐
  17. QQ不能远程控制的解决办法
  18. 解放双手,Windows Admin Center简化服务器管理
  19. 凤舞丹心东方美,中华才女竞风采 ——黛兰娜杯《中华才女大赛》七月汇演
  20. 软件项目需求管理培训

热门文章

  1. 国外设计公司H5网站模板
  2. C Primer Plus 第六版编程练习第五章答案
  3. c语言程序电机,直流电机控制C语言程序
  4. 【trajectory optimization】1 intro
  5. java版铁傀儡刷新机制,我的世界:新版村庄的铁傀儡数量都快赶上村民了?刷新效率很高!...
  6. JS生成条形码/二维码 barcode.js、JsBarcode
  7. Online Learning and Pricing with Reusable Resources: Linear Bandits with Sub-Exponential Rewards: Li
  8. 爬取听书网有声小说音频数据
  9. 计算机 图像处理 ei 期刊,【EA-ISET协会】中科院3区视觉图像处理类SCIEI源刊征稿...
  10. 工商总局抽检电商 天猫1号店等仍存售假