微博爬虫及简单数据分析

刚开始学python，选了这个题目，把代码放上来留念，没有用到很流行的框架，所以代码量挺大
GUI用wxpython写的

# _*_ coding: UTF-8 _*_
import os
import re
import requests
import sys
import wx
import traceback
from datetime import datetime
from datetime import timedelta
from lxml import etree
import data_analysisglobal file_path
file_path = ''
class Wb(wx.App):def Operate(self):self.cookie   = {}self.username = ''  # 用户名，如“Dear-迪丽热巴”self.Number   = 0  # 用户全部微博数self.number1  = 0  # 爬取到的微博数self.Guanzhu  = 0self.fans     = 0self.Content  = []  self.star     = []  # 微博对应的点赞self.Pinglun  = []  # 微博对应的评论数self.publish_tool = []self.Id       = 0000
###======================================================================================================
###======================================GUI=============================================================# 建立一个窗口和frame控件self.frame_operate = wx.Frame(None, title="Weibo_Spider_GUI", size=(500, 500))self.panel_operate = wx.Panel(self.frame_operate, -1)# 设置字体格式self.font1 = wx.Font(18, wx.ROMAN, wx.ITALIC, wx.NORMAL)self.label1 = wx.StaticText(self.panel_operate, -1, "WeiBo Spider", pos=(180, 60), style=wx.ALIGN_CENTER)self.label1.SetFont(self.font1)# cookie的标签和文本框self.label2 = wx.StaticText(self.panel_operate, -1, "请输入您微博登陆的有效cookie", pos=(160, 130), style=wx.ALIGN_CENTER)self.textCookie = wx.TextCtrl(self.panel_operate, -1, pos=(200, 150), size=(80, 20), style=wx.TE_CENTER)# 获取所爬取用户的self.Idself.label3 = wx.StaticText(self.panel_operate, -1, "请输入您所要爬取微博账号的self.Id", pos=(160, 180), style=wx.ALIGN_CENTER)self.textId = wx.TextCtrl(self.panel_operate, -1, pos=(200, 200), size=(80, 20), style=wx.TE_CENTER)# 文件存储路径self.label4 = wx.StaticText(self.panel_operate,-1,"数据文件保存路径", pos=(160,230),style=wx.ALIGN_CENTER)self.textFile_path = wx.TextCtrl(self.panel_operate,-1,pos=(200,250),size=(80,20),style=wx.TE_CENTER)# 微博的正式UI界面def get_cookie(self,event):  self.cookie = {"Cookie": self.textCookie.GetValue()}self.Id=int(self.textId.GetValue())global file_pathfile_path  = self.textFile_path.GetValue()+os.sep+"%d" % self.Id + ".txt"self.Onbutton_Start()         def Onbutton_Start(self):self.GetName() #获取用户名self.GetSimple_Info() # 获取微博数，转发量，关注数，粉丝数self.weibo_para()self.write_txt() self.weibo_UI1()def weibo_UI1(self):message = "文件爬取完毕"wx.MessageBox(message)self.weibo_UI2()def weibo_UI2(self):self.frame_operate.Destroy()self.frame_Info = wx.Frame(None,title="User_Information",size=(500,500))self.panel_Info = wx.Panel(self.frame_Info,-1)t1 = "用户昵称：" + str(self.username)t2 =  "微博数:" + str(self.Number) t3 = "粉丝数:"+str(self.fans)t4 = "关注数："+str(self.Guanzhu) self.label16 = wx.StaticText(self.panel_Info,-1,self.username,pos=(200,100),style=wx.ALIGN_LEFT)self.label5  = wx.StaticText(self.panel_Info,-1,t1,pos=(180,130),style=wx.ALIGN_LEFT)self.label13 = wx.StaticText(self.panel_Info,-1,t2,pos=(180,150),style=wx.ALIGN_LEFT)self.label14 = wx.StaticText(self.panel_Info,-1,t3,pos=(180,170),style=wx.ALIGN_LEFT)self.label15 = wx.StaticText(self.panel_Info,-1,t4,pos=(180,190),style=wx.ALIGN_LEFT)self.font2   = wx.Font(13,wx.SCRIPT,wx.ITALIC,wx.NORMAL) #小字体 font1大字体self.label16.SetFont(self.font1)self.label5.SetFont(self.font2)self.label13.SetFont(self.font2)self.label14.SetFont(self.font2)self.label15.SetFont(self.font2)self.button_news = wx.Button(self.panel_Info,-1,"查看最近微博",pos=(220,280))self.Bind(wx.EVT_BUTTON,self.weibo_UI3 ,self.button_news)self.frame_Info.Show()# 最进微博def weibo_UI3(self,event):self.frame_Info.Destroy()self.frame_news = wx.Frame(None,title="---",size=(500,500))self.panel_news = wx.Panel(self.frame_news,-1)  label18 = wx.StaticText(self.panel_news,-1,"最新微博动态",pos=(200,40))      if self.Content:text1 = "最新/置顶 微博为: " + self.Content[0]text2 = "最新/置顶 微博发布工具: " + self.publish_tool[0]text3 = "最新/置顶 微博发布时间: " + self.Time[0]text4 = "最新/置顶 微博获得赞数: " + str(self.star[0])text5 = "最新/置顶 微博获得转发数: " + str(self.Zhuanfa[0])text6 = "最新/置顶 微博获得评论数: " + str(self.Pinglun[0])self.label6  = wx.TextCtrl(self.panel_news,-1,text1,pos=(90,60),size=(250,140), style=wx.TE_MULTILINE|wx.TE_RICH)self.label7  = wx.StaticText(self.panel_news,-1,text2,pos=(90,200),style=wx.ALIGN_LEFT)self.label8  = wx.StaticText(self.panel_news,-1,text3,pos=(90,220),style=wx.ALIGN_LEFT)self.label9  = wx.StaticText(self.panel_news,-1,text4,pos=(90,240),style=wx.ALIGN_LEFT)self.label10 = wx.StaticText(self.panel_news,-1,text5,pos=(90,260),style=wx.ALIGN_LEFT)self.label11 = wx.StaticText(self.panel_news,-1,text6,pos=(90,280),style=wx.ALIGN_LEFT)# 查看微博信息self.Button_info = wx.Button(self.panel_news,-1,"点击查看之前的微博内容",pos=(220,340))self.Bind(wx.EVT_BUTTON,self.weibo_pre_info,self.Button_info)# 查看爬虫信息的文档self.Button_file = wx.Button(self.panel_news,-1,"点击查看微博数据分析图表",pos=(220,380))self.Bind(wx.EVT_BUTTON,self.analysis_UI,self.Button_file)self.frame_news.Show()def analysis_UI(self,event):self.frame_data = wx.Frame(None,title="data_analysis--20177830115",size=(500,500))self.panel_data = wx.Panel(self.frame_data,-1) text1 = "2017-2018微博转发/点赞量折线统计图"text2 = '原创微博与转发微博统计图' text3 = '微博发布工具统计图'text4 = '微博使用心情统计图' self.button_1 = wx.Button(self.panel_data,-1,text1,pos=(180,120))     self.button_2 = wx.Button(self.panel_data,-1,text2,pos=(180,160)) self.button_3 = wx.Button(self.panel_data,-1,text3,pos=(180,200)) self.button_4 = wx.Button(self.panel_data,-1,text4,pos=(180,240))self.Bind(wx.EVT_BUTTON,self.figure_1,self.button_1)self.Bind(wx.EVT_BUTTON,self.figure_2,self.button_2)self.Bind(wx.EVT_BUTTON,self.figure_3,self.button_3)self.Bind(wx.EVT_BUTTON,self.figure_4,self.button_4)self.frame_data.Show()def figure_1(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_Zhexian()def figure_2(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_YC()     def figure_3(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_GJ()  def figure_4(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_XQ()def weibo_pre_info(self,event): ## 过度函数，为了让不断进入weibo_info函数中（分条输出）不报错。（多次进入没有event触发）self.weibo_info()def weibo_info(self):#flag = 1#计次函数，flag==1，继续循环，flag==0退出循环，即不展示下一条微博 ## 这坑爹玩意根本不能用for循环，所以我只能不断进入函数self.s = wx.Frame(None,title="---",size=(500,500))self.f = wx.Panel(self.s,-1) #for i in range(1,self.Number+1):text1 = str(self.a+1)+":" + self.Content[self.a]text2 = "发布工具: " + self.publish_tool[self.a]text3 = "发布时间: " + self.Time[self.a]text4 = "点赞数: " + str(self.star[self.a])text5 = "转发数: " + str(self.Zhuanfa[self.a])text6 = "评论数: " + str(self.Pinglun[self.a])self.labela = wx.TextCtrl  (self.f,-1,text1,pos=(80, 60),size=(250,140),style=wx.TE_MULTILINE|wx.TE_RICH) self.labelb = wx.StaticText(self.f,-1,text2,pos=(80,200),style=wx.ALIGN_LEFT)self.labelc = wx.StaticText(self.f,-1,text3,pos=(80,220),style=wx.ALIGN_LEFT)self.labeld = wx.StaticText(self.f,-1,text4,pos=(80,240),style=wx.ALIGN_LEFT)self.labele = wx.StaticText(self.f,-1,text5,pos=(80,260),style=wx.ALIGN_LEFT)self.labelf = wx.StaticText(self.f,-1,text6,pos=(80,280),style=wx.ALIGN_LEFT)self.button_next=wx.Button(self.f,-1,"查看下一条",pos=(300,380))self.button_exit=wx.Button(self.f,-1,"关闭",pos=(100,380))self.Bind(wx.EVT_BUTTON,self.exit,self.button_exit)self.Bind(wx.EVT_BUTTON,self.cont,self.button_next)self.s.Show()def exit(self,event):self.s.Destroy()def cont(self,event):self.a += 1self.s.Destroy()self.weibo_info()

具体爬虫部分，参考github某大佬的

# 获取用户昵称def GetName(self):url      = "https://weibo.cn/%d/info" % (self.Id)html     = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)  username = selector.xpath("//title/text()")[0]self.username = username[:-3]  def GetSimple_Info(self):url      = "https://weibo.cn/u/%d?&page=1" % (self.Id)html     = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)  # 转化为标准的HTMLpattern  = r"\d+\.?\d*"      # 微博数wb_num = selector.xpath("//div[@class='tip2']/span[@class='tc']/text()")[0]# <div class="tip2"><span class="tc">微博[1543]</span>&nbspregx   = re.findall(pattern, wb_num, re.S | re.M)  # 只要数字（字符）for value in regx:num_wb = int(value)breakself.Number = num_wb# 关注数str_gz = selector.xpath("//div[@class='tip2']/a/text()")[0]regx = re.findall(pattern, str_gz, re.M)self.Guanzhu = int(regx[0])# 粉丝数# 获取"长微博"全部文字内容def GetLong(self, weibo_link):html = requests.get(weibo_link, cookies=self.cookie).contentselector = etree.HTML(html)info = selector.xpath("//div[@class='c']")[1]wb_content = info.xpath("div/span[@class='ctt']")[0].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout()return wb_content# 获取转发微博信息def GetZhuanfa(self, is_retweet, info, wb_content):original_user = is_retweet[0].xpath("a/text()")if not original_user:wb_content = u"转发微博已被删除"return wb_contentelse:original_user = original_user[0]retweet_reason = info.xpath("div")[-1].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)retweet_reason = retweet_reason[:retweet_reason.rindex(u"赞")]wb_content = (retweet_reason + "\n" + u"原始用户: " +original_user + "\n" + u"转发内容: " + wb_content)return wb_content#一个界面展示一条微博的发布时间、点赞数、转发数、评论数def weibo_para(self):url = "https://weibo.cn/u/%d?&page=1" % (self.Id)html = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)if selector.xpath("//input[@name='mp']") == []:page_num = 1else:page_num = (int)(selector.xpath("//input[@name='mp']")[0].attrib["value"])pattern = r"\d+\.?\d*"for page in range(1, page_num + 1):url2 = "https://weibo.cn/u/%d?&page=%d" % (self.Id, page)html2 = requests.get(url2, cookies=self.cookie).contentselector2 = etree.HTML(html2)info = selector2.xpath("//div[@class='c']")is_empty = info[0].xpath("div/span[@class='ctt']")if is_empty:for i in range(0, len(info) - 2):# 微博内容str_t = info[i].xpath("div/span[@class='ctt']")Content = str_t[0].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)Content = Content[:-1]weibo_Id = info[i].xpath("@id")[0][2:]a_link = info[i].xpath("div/span[@class='ctt']/a")is_retweet = info[i].xpath("div/span[@class='cmt']")if a_link:if a_link[-1].xpath("text()")[0] == u"全文":if not is_retweet:wb_content = wb_content[1:]Content = wb_contentif is_retweet:Content = self.GetZhuanfa(is_retweet, info[i], Content)self.Content.append(Content)# 微博发布时间str_time = info[i].xpath("div/span[@class='ct']")str_time = str_time[0].xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)Time = str_time.split(u'来自')[0]if u"刚刚" in Time:Time = datetime.now().strftime('%Y-%m-%d %H:%M')elif u"分钟" in Time:minute = Time[:Time.find(u"分钟")]minute = timedelta(minutes=int(minute))Time = (datetime.now() - minute).strftime("%Y-%m-%d %H:%M")elif u"今天" in Time:today = datetime.now().strftime("%Y-%m-%d")time = Time[3:]Time = today + " " + timeelif u"月" in Time:year = datetime.now().strftime("%Y")month = Time[0:2]day = Time[3:5]time = Time[7:12]Time = (year + "-" + month + "-" + day + " " + time)else:Time = Time[:16]self.Time.append(Time)str_footer = info[i].xpath("div")[-1]str_footer = str_footer.xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)# 微博发布工具if len(str_time.split(u'来自')) > 1:publish_tool = str_time.split(u'来自')[1]else:publish_tool = u"无"self.publish_tool.append(publish_tool)str_footer = info[i].xpath("div")[-1]str_footer = str_footer.xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)str_footer = str_footer[str_footer.rfind(u'赞'):]guid = re.findall(pattern, str_footer, re.M)    # 点赞数star = int(regx[0])self.star.append(star)# 转发数Zhuanfa = int(regx[1])self.Zhuanfa.append(Zhuanfa)# 评论数Pinglun = int(regx[2])self.Pinglun.append(Pinglun)self.number1 += 1def write_txt(self):try:contents_header = u"\n\n微博内容: \n"contents = (u"用户信息\n用户昵称：" + self.username +u"\n用户Id: " + str(self.Id) +u"\n微博数: " + str(self.Number) +u"\n关注数: " + str(self.Guanzhu) +u"\n粉丝数: " + str(self.fans) + contents_header + '\n')for i in range(1, self.number1 + 1):text = (str(i) + ":" + self.Content[i - 1] + "\n" +u"发布工具: " + self.publish_tool[i - 1] + "\n" +u"发布时间: " + self.Time[i - 1] + "\n" +u"点赞数: " + str(self.star[i - 1]) +u"转发数: " + str(self.Zhuanfa[i - 1]) +u"评论数: " + str(self.Pinglun[i - 1]) + "\n\n")contents = contents + textglobal file_pathf = open(file_path, "wb")f.write(contents.encode(sys.stdout.encoding))f.close()except Exception as e:print("Error: ", e)traceback.print_exc()

测试函数部分

def main():weibo = Wb()weibo.Operate()weibo.MainLoop() if __name__ == "__main__":main()

数据分析

import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from matplotlib import font_manager as fm
import time
from datetime import datetime
import webbrowserclass analysis(object):def __init__(self,file_name,number):self.file_name = file_nameself.number    = numberself.X_data    = []self.Y1_data   = []self.Y_data    = []self.str       = ""## 折线图展示窗口def analyse_Zhexian(self):pattern    = re.compile(r'转发数: \d+')  pattern1   = re.compile(r'\d+')pattern2   = re.compile(r'发布时间: (\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})')#提取时间pattern3   = re.compile(r'.*2016.*')pattern4   = re.compile(r'点赞数: \d+')with open(self.file_name,encoding = "utf-8") as f:str    = f.read()# 用正则表达式提取所需数据result           = pattern.findall(str)ls3              = ''.join(result)Result           = pattern1.findall(ls3)Num_Zhuanfa      = [ int(x) for x in Result ] result1           = pattern4.findall(str)ls1               = ''.join(result1)Result1           = pattern1.findall(ls1)Num_Dianzan      = [int(x) for x in Result1]Num_Zhuanfa_time = pattern2.findall(str) for i in range(0,len(Num_Zhuanfa_time)):if pattern3.findall(Num_Zhuanfa_time[i]):stop = i  breakNum_Zhuanfa      = Num_Zhuanfa[0:stop:1]Num_Dianzan      = Num_Dianzan[0:stop:1]# 数据除以1000，画图更美观for i in range(0,len(Num_Zhuanfa)):Num_Zhuanfa[i] = Num_Zhuanfa[i] /1000for i in range(0,len(Num_Dianzan)):Num_Dianzan[i] = Num_Dianzan[i] /1000#将时间转化为时间戳再转化为datetime类型aa=[time.strptime(i, "%Y-%m-%d %H:%M") for i in Num_Zhuanfa_time]timeStamp = [int(time.mktime(a)) for a in aa]Num_Zhuanfa_time=[datetime.fromtimestamp(k) for k in timeStamp]# 处理数据量过多的问题number = len(Num_Zhuanfa)Group = int(0.18 * number)k     = number // Group  for i in range(0,Group):self.X_data.append(Num_Zhuanfa_time[i*k])self.Y1_data.append(Num_Dianzan[i*k])fig1  = plt.figure(figsize=(8,5))plt.rcParams['font.sans-serif'] = ['SimHei'] ax1   = fig1.add_subplot(1,1,1)ax1.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d %H-%M'))plt.xticks(self.X_data,rotation=90)plt.yticks(np.linspace(0,5000,5,endpoint=True)) plt.title(u"2017-2018微博转发/点赞量折线图",color="black")plt.plot(self.X_data,self.Y_data,"o-",color='skyblue',label="转发量",markersize=1.5) plt.plot(self.X_data,self.Y1_data,"o-",color='pink',label="点赞量",markersize=1.5)plt.xlabel("发布时间")plt.ylabel("数量(千/条)")plt.legend() plt.show()  def analyse_YC(self):pattern = re.compile(r'转发理由')with open(self.file_name,encoding = "utf-8") as f:str = f.read()Zhuanfa = pattern.findall(str)Number_Zhuanfa = int(len(Zhuanfa))Yuanchuang     = self.number - Number_Zhuanfaplt.rcParams['font.sans-serif'] = ['SimHei']labels = ['转发微博','原创微博']sizes  = [Number_Zhuanfa,Yuanchuang]explode= (0.1,0)plt.pie(sizes,explode=explode,labels=labels,autopct='%1.1f%%',shadow=False,startangle=150)plt.title(u"原创与转发微博量",color="black")plt.show()def analyse_GJ(self):pattern = re.compile(r'发布工具: (.*)\n发布时间')with open(self.file_name,encoding = "utf-8") as f:str    = f.read()number_GJ  = pattern.findall(str)#print(number_GJ)gongju = dict()for i in number_GJ:name = iif name in gongju:gongju[name]+=1else:gongju[name]=1for key in list(gongju.keys()):if gongju[key]<=10:del gongju[key]labels = list(gongju.keys())sizes  = list(gongju.values())plt.rcParams['font.sans-serif'] = ['SimHei']plt.pie(sizes,labels=labels,autopct='%1.1f%%',shadow=True,startangle=150)plt.title(u"微博发布工具统计",color="black")plt.show()        def analyse_XQ(self):pattern = re.compile(r'\[(.{1,4})\].*\[(.{1,4})\]')with open(self.file_name,encoding = "utf-8") as f:str    = f.read() number_XQ = pattern.findall(str)# print(number_XQ)a=[]for i in range(0,len(number_XQ)):for j in (range(0,len(number_XQ[i]))):a.append(number_XQ[i][j])biaoqing = dict()for i in a:name = iif name in biaoqing:biaoqing[name]+=1else:biaoqing[name]=1  for key in list(biaoqing.keys()):if biaoqing[key] <= 2:del biaoqing[key]    labels = list(biaoqing.keys())sizes  = list(biaoqing.values())       fig1, ax1 = plt.subplots()patches, texts, autotexts = ax1.pie(sizes, labels=labels, autopct='%1.0f%%',shadow=False, startangle=170)ax1.axis('equal')#重新设置字体大小plt.rcParams['font.sans-serif'] = ['SimHei']proptease = fm.FontProperties()proptease.set_size('small')   plt.title(u"微博表情使用次数",color="black")plt.setp(autotexts, fontproperties=proptease)plt.setp(texts, fontproperties=proptease)plt.show()

python程序打包

#在cmd下安装pyinstaller
pip install pyinstaller
#打包成一个可执行文件 -F (注意将cmd窗口切换至文件保存的路径下)
pyinstaller -F filename.py

本篇只适合新手简单学习，笔者也刚学，加上复习周，后期会逐渐完善，毕竟UI写的太丑了！
另：关于获取本地用户cookie和微博账号的id操作比较简单在此不再做详细解释。如果程序跑不出来相信我一定是cookie问题

仅供作业参考，抄袭需谨慎

微博爬虫及简单数据分析相关推荐

python大作业爬虫_Python大作业---微博爬虫及简单数据分析
刚开始学python,选了这个题目,把代码放上来留念,没有用到很流行的框架,所以代码量挺大 GUI用wxpython写的 # _*_ coding: UTF-8 _*_ import os impor ...
自己编写一个简单的微博爬虫
自己编写一个简单的微博爬虫很多做社交媒体数据分析的同学需要采集一些新浪微博上的数据,新浪微博虽然有提供api,但免费的api对获取的数据项和获取的频率都有很大的限制,商业版api据说限制较少,但是作 ...
微博爬虫数据分析可视化程序设计报告
文章目录 1 需求分析 1.1 引言 1.2 功能需求 1.3运行需求 2 详细设计 2.1界面设计 2.2 程序设计 2.3 容错性 3 总结具体代码 analysis.py keywords_n ...
用户、话题、评论一网打尽，分享一个最强微博爬虫
实现的功能微博向来是一个极好的吃瓜圣地,为了获取微博上行行色色的数据,微博相关的爬虫也是层出不穷,因为无论是运营者还是数据分析从业者都或多或少需要微博数据,我的许多朋友也不例外,经过断断续续的努力, ...
分分钟百万条数据的微博爬虫分析
微博爬虫系列2-分析微博接口大家好,我是W 经过上一篇的分析我们无意中发现了微博的接口,并且不做任何的账号.流量限制可以直接获取最完整的微博数据.接下来我们就通过分析微博粉丝接口查看其中的数据结构顺 ...
python3爬虫及数据分析_Python3爬虫及可视化数据分析系列图文教程——大纲目录...
本文首发于:Python3爬虫及可视化数据分析系列图文教程--大纲目录 - Python量化投资www.lizenghai.com 写在最前最近更新:2019-03-28 本教程所有源码见文尾. ...
【网络爬虫】【java】微博爬虫（一）：小试牛刀——网易微博爬虫（自定义关键字爬取微博数据）（附软件源码）...
一.写在前面 (本专栏分为"java版微博爬虫"和"python版网络爬虫"两个项目,系列里所有文章将基于这两个项目讲解,项目完整源码已经整理到我的Github ...
微博爬虫“免登录”技巧详解及Java实现
本文源地址:http://www.fullstackyang.com/...,转发请注明该地址或segmentfault地址,谢谢! 一.微博一定要登录才能抓取? 目前,对于微博的爬虫,大部分是基于模 ...
一步一步学python爬虫_初学Python之爬虫的简单入门
初学Python之爬虫的简单入门一.什么是爬虫? 1.简单介绍爬虫爬虫的全称为网络爬虫,简称爬虫,别名有网络机器人,网络蜘蛛等等. 网络爬虫是一种自动获取网页内容的程序,为搜索引擎提供了重要的数据 ...

微博爬虫及简单数据分析

微博爬虫及简单数据分析相关推荐

最新文章

热门文章