刚开始学python,选了这个题目,把代码放上来留念,没有用到很流行的框架,所以代码量挺大
GUI用wxpython写的

# _*_ coding: UTF-8 _*_
import os
import re
import requests
import sys
import wx
import traceback
from datetime import datetime
from datetime import timedelta
from lxml import etree
import data_analysisglobal file_path
file_path = ''
class Wb(wx.App):def Operate(self):self.cookie   = {}self.username = ''  # 用户名,如“Dear-迪丽热巴”self.Number   = 0  # 用户全部微博数self.number1  = 0  # 爬取到的微博数self.Guanzhu  = 0self.fans     = 0self.Content  = []  self.star     = []  # 微博对应的点赞self.Pinglun  = []  # 微博对应的评论数self.publish_tool = []self.Id       = 0000
###======================================================================================================
###======================================GUI=============================================================# 建立一个窗口和frame控件self.frame_operate = wx.Frame(None, title="Weibo_Spider_GUI", size=(500, 500))self.panel_operate = wx.Panel(self.frame_operate, -1)# 设置字体格式self.font1 = wx.Font(18, wx.ROMAN, wx.ITALIC, wx.NORMAL)self.label1 = wx.StaticText(self.panel_operate, -1, "WeiBo Spider", pos=(180, 60), style=wx.ALIGN_CENTER)self.label1.SetFont(self.font1)# cookie的标签和文本框self.label2 = wx.StaticText(self.panel_operate, -1, "请输入您微博登陆的有效cookie", pos=(160, 130), style=wx.ALIGN_CENTER)self.textCookie = wx.TextCtrl(self.panel_operate, -1, pos=(200, 150), size=(80, 20), style=wx.TE_CENTER)# 获取所爬取用户的self.Idself.label3 = wx.StaticText(self.panel_operate, -1, "请输入您所要爬取微博账号的self.Id", pos=(160, 180), style=wx.ALIGN_CENTER)self.textId = wx.TextCtrl(self.panel_operate, -1, pos=(200, 200), size=(80, 20), style=wx.TE_CENTER)# 文件存储路径self.label4 = wx.StaticText(self.panel_operate,-1,"数据文件保存路径", pos=(160,230),style=wx.ALIGN_CENTER)self.textFile_path = wx.TextCtrl(self.panel_operate,-1,pos=(200,250),size=(80,20),style=wx.TE_CENTER)# 微博的正式UI界面def get_cookie(self,event):  self.cookie = {"Cookie": self.textCookie.GetValue()}self.Id=int(self.textId.GetValue())global file_pathfile_path  = self.textFile_path.GetValue()+os.sep+"%d" % self.Id + ".txt"self.Onbutton_Start()         def Onbutton_Start(self):self.GetName() #获取用户名self.GetSimple_Info() # 获取微博数,转发量,关注数,粉丝数self.weibo_para()self.write_txt() self.weibo_UI1()def weibo_UI1(self):message = "文件爬取完毕"wx.MessageBox(message)self.weibo_UI2()def weibo_UI2(self):self.frame_operate.Destroy()self.frame_Info = wx.Frame(None,title="User_Information",size=(500,500))self.panel_Info = wx.Panel(self.frame_Info,-1)t1 = "用户昵称:" + str(self.username)t2 =  "微博数:" + str(self.Number) t3 = "粉丝数:"+str(self.fans)t4 = "关注数:"+str(self.Guanzhu) self.label16 = wx.StaticText(self.panel_Info,-1,self.username,pos=(200,100),style=wx.ALIGN_LEFT)self.label5  = wx.StaticText(self.panel_Info,-1,t1,pos=(180,130),style=wx.ALIGN_LEFT)self.label13 = wx.StaticText(self.panel_Info,-1,t2,pos=(180,150),style=wx.ALIGN_LEFT)self.label14 = wx.StaticText(self.panel_Info,-1,t3,pos=(180,170),style=wx.ALIGN_LEFT)self.label15 = wx.StaticText(self.panel_Info,-1,t4,pos=(180,190),style=wx.ALIGN_LEFT)self.font2   = wx.Font(13,wx.SCRIPT,wx.ITALIC,wx.NORMAL) #小字体 font1大字体self.label16.SetFont(self.font1)self.label5.SetFont(self.font2)self.label13.SetFont(self.font2)self.label14.SetFont(self.font2)self.label15.SetFont(self.font2)self.button_news = wx.Button(self.panel_Info,-1,"查看最近微博",pos=(220,280))self.Bind(wx.EVT_BUTTON,self.weibo_UI3 ,self.button_news)self.frame_Info.Show()# 最进微博def weibo_UI3(self,event):self.frame_Info.Destroy()self.frame_news = wx.Frame(None,title="---",size=(500,500))self.panel_news = wx.Panel(self.frame_news,-1)  label18 = wx.StaticText(self.panel_news,-1,"最新微博动态",pos=(200,40))      if self.Content:text1 = "最新/置顶 微博为: " + self.Content[0]text2 = "最新/置顶 微博发布工具: " + self.publish_tool[0]text3 = "最新/置顶 微博发布时间: " + self.Time[0]text4 = "最新/置顶 微博获得赞数: " + str(self.star[0])text5 = "最新/置顶 微博获得转发数: " + str(self.Zhuanfa[0])text6 = "最新/置顶 微博获得评论数: " + str(self.Pinglun[0])self.label6  = wx.TextCtrl(self.panel_news,-1,text1,pos=(90,60),size=(250,140), style=wx.TE_MULTILINE|wx.TE_RICH)self.label7  = wx.StaticText(self.panel_news,-1,text2,pos=(90,200),style=wx.ALIGN_LEFT)self.label8  = wx.StaticText(self.panel_news,-1,text3,pos=(90,220),style=wx.ALIGN_LEFT)self.label9  = wx.StaticText(self.panel_news,-1,text4,pos=(90,240),style=wx.ALIGN_LEFT)self.label10 = wx.StaticText(self.panel_news,-1,text5,pos=(90,260),style=wx.ALIGN_LEFT)self.label11 = wx.StaticText(self.panel_news,-1,text6,pos=(90,280),style=wx.ALIGN_LEFT)# 查看微博信息self.Button_info = wx.Button(self.panel_news,-1,"点击查看之前的微博内容",pos=(220,340))self.Bind(wx.EVT_BUTTON,self.weibo_pre_info,self.Button_info)# 查看爬虫信息的文档self.Button_file = wx.Button(self.panel_news,-1,"点击查看微博数据分析图表",pos=(220,380))self.Bind(wx.EVT_BUTTON,self.analysis_UI,self.Button_file)self.frame_news.Show()def analysis_UI(self,event):self.frame_data = wx.Frame(None,title="data_analysis--20177830115",size=(500,500))self.panel_data = wx.Panel(self.frame_data,-1) text1 = "2017-2018微博转发/点赞量折线统计图"text2 = '原创微博与转发微博统计图' text3 = '微博发布工具统计图'text4 = '微博使用心情统计图' self.button_1 = wx.Button(self.panel_data,-1,text1,pos=(180,120))     self.button_2 = wx.Button(self.panel_data,-1,text2,pos=(180,160)) self.button_3 = wx.Button(self.panel_data,-1,text3,pos=(180,200)) self.button_4 = wx.Button(self.panel_data,-1,text4,pos=(180,240))self.Bind(wx.EVT_BUTTON,self.figure_1,self.button_1)self.Bind(wx.EVT_BUTTON,self.figure_2,self.button_2)self.Bind(wx.EVT_BUTTON,self.figure_3,self.button_3)self.Bind(wx.EVT_BUTTON,self.figure_4,self.button_4)self.frame_data.Show()def figure_1(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_Zhexian()def figure_2(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_YC()     def figure_3(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_GJ()  def figure_4(self,event):global file_pathfigure = data_analysis.analysis(file_path,self.Number)figure.analyse_XQ()def weibo_pre_info(self,event): ## 过度函数,为了让不断进入weibo_info函数中(分条输出)不报错。(多次进入没有event触发)self.weibo_info()def weibo_info(self):#flag = 1#计次函数,flag==1,继续循环,flag==0退出循环,即不展示下一条微博 ## 这坑爹玩意根本不能用for循环,所以我只能不断进入函数self.s = wx.Frame(None,title="---",size=(500,500))self.f = wx.Panel(self.s,-1) #for i in range(1,self.Number+1):text1 = str(self.a+1)+":" + self.Content[self.a]text2 = "发布工具: " + self.publish_tool[self.a]text3 = "发布时间: " + self.Time[self.a]text4 = "点赞数: " + str(self.star[self.a])text5 = "转发数: " + str(self.Zhuanfa[self.a])text6 = "评论数: " + str(self.Pinglun[self.a])self.labela = wx.TextCtrl  (self.f,-1,text1,pos=(80, 60),size=(250,140),style=wx.TE_MULTILINE|wx.TE_RICH) self.labelb = wx.StaticText(self.f,-1,text2,pos=(80,200),style=wx.ALIGN_LEFT)self.labelc = wx.StaticText(self.f,-1,text3,pos=(80,220),style=wx.ALIGN_LEFT)self.labeld = wx.StaticText(self.f,-1,text4,pos=(80,240),style=wx.ALIGN_LEFT)self.labele = wx.StaticText(self.f,-1,text5,pos=(80,260),style=wx.ALIGN_LEFT)self.labelf = wx.StaticText(self.f,-1,text6,pos=(80,280),style=wx.ALIGN_LEFT)self.button_next=wx.Button(self.f,-1,"查看下一条",pos=(300,380))self.button_exit=wx.Button(self.f,-1,"关闭",pos=(100,380))self.Bind(wx.EVT_BUTTON,self.exit,self.button_exit)self.Bind(wx.EVT_BUTTON,self.cont,self.button_next)self.s.Show()def exit(self,event):self.s.Destroy()def cont(self,event):self.a += 1self.s.Destroy()self.weibo_info()

具体爬虫部分,参考github某大佬的

# 获取用户昵称def GetName(self):url      = "https://weibo.cn/%d/info" % (self.Id)html     = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)  username = selector.xpath("//title/text()")[0]self.username = username[:-3]  def GetSimple_Info(self):url      = "https://weibo.cn/u/%d?&page=1" % (self.Id)html     = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)  # 转化为标准的HTMLpattern  = r"\d+\.?\d*"      # 微博数wb_num = selector.xpath("//div[@class='tip2']/span[@class='tc']/text()")[0]# <div class="tip2"><span class="tc">微博[1543]</span>&nbspregx   = re.findall(pattern, wb_num, re.S | re.M)  # 只要数字(字符)for value in regx:num_wb = int(value)breakself.Number = num_wb# 关注数str_gz = selector.xpath("//div[@class='tip2']/a/text()")[0]regx = re.findall(pattern, str_gz, re.M)self.Guanzhu = int(regx[0])# 粉丝数# 获取"长微博"全部文字内容def GetLong(self, weibo_link):html = requests.get(weibo_link, cookies=self.cookie).contentselector = etree.HTML(html)info = selector.xpath("//div[@class='c']")[1]wb_content = info.xpath("div/span[@class='ctt']")[0].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout()return wb_content# 获取转发微博信息def GetZhuanfa(self, is_retweet, info, wb_content):original_user = is_retweet[0].xpath("a/text()")if not original_user:wb_content = u"转发微博已被删除"return wb_contentelse:original_user = original_user[0]retweet_reason = info.xpath("div")[-1].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)retweet_reason = retweet_reason[:retweet_reason.rindex(u"赞")]wb_content = (retweet_reason + "\n" + u"原始用户: " +original_user + "\n" + u"转发内容: " + wb_content)return wb_content#一个界面展示一条微博的发布时间、点赞数、转发数、评论数def weibo_para(self):url = "https://weibo.cn/u/%d?&page=1" % (self.Id)html = requests.get(url, cookies=self.cookie).contentselector = etree.HTML(html)if selector.xpath("//input[@name='mp']") == []:page_num = 1else:page_num = (int)(selector.xpath("//input[@name='mp']")[0].attrib["value"])pattern = r"\d+\.?\d*"for page in range(1, page_num + 1):url2 = "https://weibo.cn/u/%d?&page=%d" % (self.Id, page)html2 = requests.get(url2, cookies=self.cookie).contentselector2 = etree.HTML(html2)info = selector2.xpath("//div[@class='c']")is_empty = info[0].xpath("div/span[@class='ctt']")if is_empty:for i in range(0, len(info) - 2):# 微博内容str_t = info[i].xpath("div/span[@class='ctt']")Content = str_t[0].xpath("string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)Content = Content[:-1]weibo_Id = info[i].xpath("@id")[0][2:]a_link = info[i].xpath("div/span[@class='ctt']/a")is_retweet = info[i].xpath("div/span[@class='cmt']")if a_link:if a_link[-1].xpath("text()")[0] == u"全文":if not is_retweet:wb_content = wb_content[1:]Content = wb_contentif is_retweet:Content = self.GetZhuanfa(is_retweet, info[i], Content)self.Content.append(Content)# 微博发布时间str_time = info[i].xpath("div/span[@class='ct']")str_time = str_time[0].xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)Time = str_time.split(u'来自')[0]if u"刚刚" in Time:Time = datetime.now().strftime('%Y-%m-%d %H:%M')elif u"分钟" in Time:minute = Time[:Time.find(u"分钟")]minute = timedelta(minutes=int(minute))Time = (datetime.now() - minute).strftime("%Y-%m-%d %H:%M")elif u"今天" in Time:today = datetime.now().strftime("%Y-%m-%d")time = Time[3:]Time = today + " " + timeelif u"月" in Time:year = datetime.now().strftime("%Y")month = Time[0:2]day = Time[3:5]time = Time[7:12]Time = (year + "-" + month + "-" + day + " " + time)else:Time = Time[:16]self.Time.append(Time)str_footer = info[i].xpath("div")[-1]str_footer = str_footer.xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)# 微博发布工具if len(str_time.split(u'来自')) > 1:publish_tool = str_time.split(u'来自')[1]else:publish_tool = u"无"self.publish_tool.append(publish_tool)str_footer = info[i].xpath("div")[-1]str_footer = str_footer.xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode(sys.stdout.encoding)str_footer = str_footer[str_footer.rfind(u'赞'):]guid = re.findall(pattern, str_footer, re.M)    # 点赞数star = int(regx[0])self.star.append(star)# 转发数Zhuanfa = int(regx[1])self.Zhuanfa.append(Zhuanfa)# 评论数Pinglun = int(regx[2])self.Pinglun.append(Pinglun)self.number1 += 1def write_txt(self):try:contents_header = u"\n\n微博内容: \n"contents = (u"用户信息\n用户昵称:" + self.username +u"\n用户Id: " + str(self.Id) +u"\n微博数: " + str(self.Number) +u"\n关注数: " + str(self.Guanzhu) +u"\n粉丝数: " + str(self.fans) + contents_header + '\n')for i in range(1, self.number1 + 1):text = (str(i) + ":" + self.Content[i - 1] + "\n" +u"发布工具: " + self.publish_tool[i - 1] + "\n" +u"发布时间: " + self.Time[i - 1] + "\n" +u"点赞数: " + str(self.star[i - 1]) +u"转发数: " + str(self.Zhuanfa[i - 1]) +u"评论数: " + str(self.Pinglun[i - 1]) + "\n\n")contents = contents + textglobal file_pathf = open(file_path, "wb")f.write(contents.encode(sys.stdout.encoding))f.close()except Exception as e:print("Error: ", e)traceback.print_exc()

测试函数部分

def main():weibo = Wb()weibo.Operate()weibo.MainLoop() if __name__ == "__main__":main()

数据分析

import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from matplotlib import font_manager as fm
import time
from datetime import datetime
import webbrowserclass analysis(object):def __init__(self,file_name,number):self.file_name = file_nameself.number    = numberself.X_data    = []self.Y1_data   = []self.Y_data    = []self.str       = ""## 折线图展示窗口def analyse_Zhexian(self):pattern    = re.compile(r'转发数: \d+')  pattern1   = re.compile(r'\d+')pattern2   = re.compile(r'发布时间: (\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})')#提取时间pattern3   = re.compile(r'.*2016.*')pattern4   = re.compile(r'点赞数: \d+')with open(self.file_name,encoding = "utf-8") as f:str    = f.read()# 用正则表达式提取所需数据result           = pattern.findall(str)ls3              = ''.join(result)Result           = pattern1.findall(ls3)Num_Zhuanfa      = [ int(x) for x in Result ] result1           = pattern4.findall(str)ls1               = ''.join(result1)Result1           = pattern1.findall(ls1)Num_Dianzan      = [int(x) for x in Result1]Num_Zhuanfa_time = pattern2.findall(str) for i in range(0,len(Num_Zhuanfa_time)):if pattern3.findall(Num_Zhuanfa_time[i]):stop = i  breakNum_Zhuanfa      = Num_Zhuanfa[0:stop:1]Num_Dianzan      = Num_Dianzan[0:stop:1]# 数据除以1000,画图更美观for i in range(0,len(Num_Zhuanfa)):Num_Zhuanfa[i] = Num_Zhuanfa[i] /1000for i in range(0,len(Num_Dianzan)):Num_Dianzan[i] = Num_Dianzan[i] /1000#将时间转化为时间戳再转化为datetime类型aa=[time.strptime(i, "%Y-%m-%d %H:%M") for i in Num_Zhuanfa_time]timeStamp = [int(time.mktime(a)) for a in aa]Num_Zhuanfa_time=[datetime.fromtimestamp(k) for k in timeStamp]# 处理数据量过多的问题number = len(Num_Zhuanfa)Group = int(0.18 * number)k     = number // Group  for i in range(0,Group):self.X_data.append(Num_Zhuanfa_time[i*k])self.Y1_data.append(Num_Dianzan[i*k])fig1  = plt.figure(figsize=(8,5))plt.rcParams['font.sans-serif'] = ['SimHei'] ax1   = fig1.add_subplot(1,1,1)ax1.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d %H-%M'))plt.xticks(self.X_data,rotation=90)plt.yticks(np.linspace(0,5000,5,endpoint=True)) plt.title(u"2017-2018微博转发/点赞量折线图",color="black")plt.plot(self.X_data,self.Y_data,"o-",color='skyblue',label="转发量",markersize=1.5) plt.plot(self.X_data,self.Y1_data,"o-",color='pink',label="点赞量",markersize=1.5)plt.xlabel("发布时间")plt.ylabel("数量(千/条)")plt.legend() plt.show()  def analyse_YC(self):pattern = re.compile(r'转发理由')with open(self.file_name,encoding = "utf-8") as f:str = f.read()Zhuanfa = pattern.findall(str)Number_Zhuanfa = int(len(Zhuanfa))Yuanchuang     = self.number - Number_Zhuanfaplt.rcParams['font.sans-serif'] = ['SimHei']labels = ['转发微博','原创微博']sizes  = [Number_Zhuanfa,Yuanchuang]explode= (0.1,0)plt.pie(sizes,explode=explode,labels=labels,autopct='%1.1f%%',shadow=False,startangle=150)plt.title(u"原创与转发微博量",color="black")plt.show()def analyse_GJ(self):pattern = re.compile(r'发布工具: (.*)\n发布时间')with open(self.file_name,encoding = "utf-8") as f:str    = f.read()number_GJ  = pattern.findall(str)#print(number_GJ)gongju = dict()for i in number_GJ:name = iif name in gongju:gongju[name]+=1else:gongju[name]=1for key in list(gongju.keys()):if gongju[key]<=10:del gongju[key]labels = list(gongju.keys())sizes  = list(gongju.values())plt.rcParams['font.sans-serif'] = ['SimHei']plt.pie(sizes,labels=labels,autopct='%1.1f%%',shadow=True,startangle=150)plt.title(u"微博发布工具统计",color="black")plt.show()        def analyse_XQ(self):pattern = re.compile(r'\[(.{1,4})\].*\[(.{1,4})\]')with open(self.file_name,encoding = "utf-8") as f:str    = f.read() number_XQ = pattern.findall(str)# print(number_XQ)a=[]for i in range(0,len(number_XQ)):for j in (range(0,len(number_XQ[i]))):a.append(number_XQ[i][j])biaoqing = dict()for i in a:name = iif name in biaoqing:biaoqing[name]+=1else:biaoqing[name]=1  for key in list(biaoqing.keys()):if biaoqing[key] <= 2:del biaoqing[key]    labels = list(biaoqing.keys())sizes  = list(biaoqing.values())       fig1, ax1 = plt.subplots()patches, texts, autotexts = ax1.pie(sizes, labels=labels, autopct='%1.0f%%',shadow=False, startangle=170)ax1.axis('equal')#重新设置字体大小plt.rcParams['font.sans-serif'] = ['SimHei']proptease = fm.FontProperties()proptease.set_size('small')   plt.title(u"微博表情使用次数",color="black")plt.setp(autotexts, fontproperties=proptease)plt.setp(texts, fontproperties=proptease)plt.show()

python程序打包

#在cmd下安装pyinstaller
pip install pyinstaller
#打包成一个可执行文件 -F (注意将cmd窗口切换至文件保存的路径下)
pyinstaller -F filename.py

本篇只适合新手简单学习,笔者也刚学,加上复习周,后期会逐渐完善,毕竟UI写的太丑了 !
另:关于获取本地用户cookie和微博账号的id操作比较简单在此不再做详细解释。如果程序跑不出来相信我一定是cookie问题

仅供作业参考,抄袭需谨慎

微博爬虫及简单数据分析相关推荐

  1. python大作业爬虫_Python大作业---微博爬虫及简单数据分析

    刚开始学python,选了这个题目,把代码放上来留念,没有用到很流行的框架,所以代码量挺大 GUI用wxpython写的 # _*_ coding: UTF-8 _*_ import os impor ...

  2. 自己编写一个简单的微博爬虫

    自己编写一个简单的微博爬虫 很多做社交媒体数据分析的同学需要采集一些新浪微博上的数据,新浪微博虽然有提供api,但免费的api对获取的数据项和获取的频率都有很大的限制,商业版api据说限制较少,但是作 ...

  3. 微博爬虫数据分析可视化程序设计报告

    文章目录 1 需求分析 1.1 引言 1.2 功能需求 1.3运行需求 2 详细设计 2.1界面设计 2.2 程序设计 2.3 容错性 3 总结 具体代码 analysis.py keywords_n ...

  4. 用户、话题、评论一网打尽,分享一个最强微博爬虫

    实现的功能 微博向来是一个极好的吃瓜圣地,为了获取微博上行行色色的数据,微博相关的爬虫也是层出不穷,因为无论是运营者还是数据分析从业者都或多或少需要微博数据,我的许多朋友也不例外,经过断断续续的努力, ...

  5. 分分钟百万条数据的微博爬虫分析

    微博爬虫系列2-分析微博接口 大家好,我是W 经过上一篇的分析我们无意中发现了微博的接口,并且不做任何的账号.流量限制可以直接获取最完整的微博数据.接下来我们就通过分析微博粉丝接口查看其中的数据结构顺 ...

  6. python3爬虫及数据分析_Python3爬虫及可视化数据分析系列图文教程——大纲目录...

    本文首发于:Python3爬虫及可视化数据分析系列图文教程--大纲目录 - Python量化投资​www.lizenghai.com 写在最前 最近更新:2019-03-28 本教程所有源码见文尾. ...

  7. 【网络爬虫】【java】微博爬虫(一):小试牛刀——网易微博爬虫(自定义关键字爬取微博数据)(附软件源码)...

    一.写在前面 (本专栏分为"java版微博爬虫"和"python版网络爬虫"两个项目,系列里所有文章将基于这两个项目讲解,项目完整源码已经整理到我的Github ...

  8. 微博爬虫“免登录”技巧详解及Java实现

    本文源地址:http://www.fullstackyang.com/...,转发请注明该地址或segmentfault地址,谢谢! 一.微博一定要登录才能抓取? 目前,对于微博的爬虫,大部分是基于模 ...

  9. 一步一步学python爬虫_初学Python之爬虫的简单入门

    初学Python之爬虫的简单入门 一.什么是爬虫? 1.简单介绍爬虫 爬虫的全称为网络爬虫,简称爬虫,别名有网络机器人,网络蜘蛛等等. 网络爬虫是一种自动获取网页内容的程序,为搜索引擎提供了重要的数据 ...

最新文章

  1. G1的Region是如何划分数量和大小的?
  2. 【技术实验】表格存储Tablestore准实时同步数据到Elasticsearch
  3. 魔改GPT自动写网文,速度一秒十字,还能给太监作品无限续更 | 开源
  4. 深度学习赋能视频编码
  5. python案例源码_【python】python实例集一
  6. 66-Flutter移动电商实战-会员中心_编写ListTile的通用方法
  7. Vuex状态管理方式
  8. XP下,文件夹添加右键命令行
  9. Linux 下查看线程信息
  10. 京东发布FastReID:目前最强悍的目标重识别开源库!
  11. android 音频配置文件,Android音频系统
  12. 基于JAVA+SpringBoot+Mybatis+MYSQL的贷款审批系统
  13. [Luogu] 模板题-最近公共祖先
  14. Linux服务器的常用备份方法
  15. Office组件无法正常使用的解决方法
  16. 如何防止盗号 使用windows自带的 屏幕键盘 OSK
  17. 数据结构课程设计(二)---算术表达式求值
  18. pat basic 1082 射击比赛
  19. Go语言编程设计学习Day1:helloworld 变量 常量
  20. 云计算基础之如何学习云计算?

热门文章

  1. python 期货现货差价监测_大宗商品现货数据不好拿?商品季节性难跟踪?Python爬虫一键解决没烦恼...
  2. 外卖返利系统外卖返利公众号外卖返利源码
  3. echarts 百度地图统计分布图,地图可视化统计
  4. st3搭建python开发环境
  5. 读书笔记:《学会提问》
  6. Python 计时器(秒钟、秒表)
  7. Bluetooth sco协议录音
  8. Android设备虚拟摄像头技术实现
  9. 基于Proteus学习单片机系列(七)——实时时钟DS1302
  10. http-parser用法