python爬知乎_python爬行求知。,爬取,知乎,精华

import json

import csv

import requests

import re

import time

def getchina(str1): # 提取中文

res1 = ''.join(re.findall('[\u4e00-\u9fa5]',str1))

return res1

def gettime(timeStamp): # 将时间戳转为时间字符串

timeArray = time.localtime(timeStamp)

#otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)

otherStyleTime = time.strftime("%Y-%m-%d", timeArray)

return otherStyleTime

def writecsv(data): # 将数据写入csv

for item in data:

#item['target']['content']

if 'title' not in item['target'].keys():

#print(item['target']['question']['title'],gettime(item['target']['updated_time']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content']))

with open('ifo.csv','a',newline='') as f:

csv_writer = csv.writer(f)

csv_writer.writerow([item['target']['question']['title'],gettime(item['target']['updated_time']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content'])])

else :

#print(item['target']['title'],gettime(item['target']['updated']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content']))

with open('ifo.csv','a',newline='') as f:

csv_writer = csv.writer(f)

csv_writer.writerow([item['target']['title'],gettime(item['target']['updated']),item['target']['author']['name'],item['target']['voteup_count'],item['target']['comment_count'],getchina(item['target']['content'])])

with open('ifo.csv','a',newline='',encoding='utf-8') as f:

csv_writer = csv.writer(f)

csv_writer.writerow(["title","time","name","voteup","comment","content"])

url = "http://www.zhihu.com/api/v4/topics/21238418/feeds/essence?include=data%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Danswer%29%5D.target.content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Danswer%29%5D.target.is_normal%2Ccomment_count%2Cvoteup_count%2Ccontent%2Crelevant_info%2Cexcerpt.author.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Darticle%29%5D.target.content%2Cvoteup_count%2Ccomment_count%2Cvoting%2Cauthor.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dtopic_sticky_module%29%5D.target.data%5B%3F%28target.type%3Dpeople%29%5D.target.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.annotation_detail%2Ccontent%2Chermes_label%2Cis_labeled%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Canswer_type%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.author.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Danswer%29%5D.target.paid_info%3Bdata%5B%3F%28target.type%3Darticle%29%5D.target.annotation_detail%2Ccontent%2Chermes_label%2Cis_labeled%2Cauthor.badge%5B%3F%28type%3Dbest_answerer%29%5D.topics%3Bdata%5B%3F%28target.type%3Dquestion%29%5D.target.annotation_detail%2Ccomment_count%3B&limit=10&offset=0"

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",

"Refer":"https://www.zhihu.com/"

}

resp = requests.get(url,headers=headers)

content = resp.content.decode('utf-8')

res = json.loads(content)

data = res['data']

count = 0

while(res['paging']['next']!=url):

count = count + 1

print(count)

writecsv(data)

url = res['paging']['next']

resp = requests.get(url,headers=headers)

content = resp.content.decode('utf-8')

res = json.loads(content)

data = res['data']

python爬知乎_python爬行求知。,爬取,知乎,精华相关推荐

python爬虫电影信息_Python爬虫入门 | 爬取豆瓣电影信息
这是一个适用于小白的Python爬虫免费教学课程,只有7节,让零基础的你初步了解爬虫,跟着课程内容能自己爬取资源.看着文章,打开电脑动手实践,平均45分钟就能学完一节,如果你愿意,今天内你就可以迈入爬 ...
python网络爬虫代理服务器_python爬虫如何抓取代理服务器
一年前突然有个灵感,想搞个强大的网盘搜索引擎,但由于大学本科学习软件工程偏嵌入式方向,web方面的能力有点弱,不会jsp,不懂html,好久没有玩过sql,但就是趁着年轻人的这股不妥协的劲儿,硬是把以 ...
python爬虫贴吧_Python爬虫如何爬取贴吧内容
爬取贴吧内容先了解贴吧url组成: 每个贴吧url都是以'https://tieba.baidu.com/f?'开头,然后是关键字 kw=''贴吧名字'',再后面是 &pn=页数 (pn=0 ...
python爬虫实例手机_Python爬虫实现爬取京东手机页面的图片(实例代码)
实例如下所示: __author__ = 'Fred Zhao' import requests from bs4 import BeautifulSoup import os from urllib ...
python爬虫金融数据_python爬虫项目-爬取雪球网金融数据（关注、持续更新）
(一)python金融数据爬虫项目爬取目标:雪球网(起始url:https://xueqiu.com/hq#exchange=cn&firstname=1&secondname=1_ ...
python爬虫外贸客户_python实战成功爬取海外批发商价格信息并写入记事本
运行平台:windows Python版本:Python 3.7.0 用到的第三方库:requests ,Beautiful Soup,re IDE:jupyter notebook 浏览器:Chro ...
python 百度云盘数据迁移_python爬虫，爬取百度云盘，找你兄弟的机器活塞运动原理文件？...
寻找并分析百度云的转存api 首先你得有一个百度云盘的账号,然后登录,用浏览器(这里用火狐浏览器做示范)打开一个分享链接.F12打开控制台进行抓包.手动进行转存操作:全选文件->保存到网盘-&g ...
python外国网站爬虫_python 网络爬虫-爬取网页外部网站
前言上一篇中我们在维基百科的内部网站上随机跳转进入文章类网页,而忽视外部网站链接.本篇文章将处理网站的外部链接并试图收集一些网站数据.和单个域名网站爬取不同,不同域名的网站结构千差万别,这就意味我们 ...
python爬去新浪微博_Python 超简单爬取新浪微博数据 (高级版)
新浪微博的数据可是非常有价值的,你可以拿来数据分析.拿来做网站.甚至是*.不过很多人由于技术限制,想要使用的时候只能使用复制粘贴这样的笨方法.没关系,现在就教大家如何批量爬取微博的数据,大大加快数据迁 ...

python爬知乎_python爬行求知。,爬取,知乎,精华

python爬知乎_python爬行求知。,爬取,知乎,精华相关推荐

最新文章

热门文章