微博热搜数据变化趋势视频化展示
前一篇,说了如何采集微博热搜数据并存储,这篇就讲讲如何将采集到的热搜数据视频化。新年新气象,更新一波。PS:这号鸽到都年更了(`・ω・´)
基本想法就是将热度数值确定一个椭圆的数据,将热搜词条放在一张图里,将不同时间的热搜数据图片做成视频,这样就可以直观地看到热搜的变化趋势,再添加BGM就成了。
Talk is cheap, show me the code
# -*- coding: utf-8 -*-import pandas as pd
import random
from tqdm import tqdm
import datetime
import time
import math
import numpy as npimport pygame #采用pygame绘制图片
import sys
from pygame.locals import *
import cv2
from PIL import ImageGrab #截屏获取图片形成视频
import moviepy.editor as mpeimport My_Email as myclass WeiboHotnessVideo:def __init__(self, date):'''初始化对象Parameters----------date : str日期,如20200202.Returns-------None.'''self.folder_path = r'XXXXXXXXXXXX\weibo_hotness'self.date = date #当前天日期self.next_date = datetime.date(int(date[:4]), int(date[4:6]), int(date[-2:])) + datetime.timedelta(days=1) #下一天日期self.next_date = datetime.datetime.strftime(self.next_date, '%Y%m%d')self.starttime = '0430' #每天的开始和结束时间self.frame_number = 10 #每列数据间帧数self.fps = 60 #pygame帧率self.video_fps = 60 #生成的视频帧率self.background_color = (255, 255, 255) #视频背景颜色self.hotness_video() #数据视频化self.add_BGM() #配音def hotness_video(self):'''Returns-------None.'''all_frame_data = self.hotness_data_settle() #处理热度数据#重置文件夹地址self.folder_path = r'D:\weibo_hotness_video'#录屏初始化p = ImageGrab.grab()#获得当前屏幕a,b=p.size#获得当前屏幕的大小self.screen_width = aself.screen_height = bself.screen_ratio = self.screen_height/self.screen_widthfourcc = cv2.VideoWriter_fourcc(*'XVID')#编码格式#输出文件命名为test.mp4,帧率为60,可以自己设置video = cv2.VideoWriter(self.folder_path + '\VIDEO\%s.avi'%self.date, fourcc, self.video_fps, (a, b))# pygame 初始化pygame.init()FPSClock = pygame.time.Clock()screen = pygame.display.set_mode((self.screen_width, self.screen_height)) hotness_title_number = len(all_frame_data)previous_frame_data = None#制作片头movie_start_text = '%s年%s月%s日微博热搜动态'%(self.date[:4], self.date[4:6], self.date[-2:])#设置片头背景图片background = pygame.image.load(self.folder_path + '\movie_start.jpg')#确定文字格式和大小my_font = pygame.font.Font(self.folder_path + r'\STXINGKA.TTF',120)#新建文本图章textImage = my_font.render(movie_start_text, True, (255,153,0))#文本框尺寸text_size_x, text_size_y = textImage.get_size()for i in range(0, int(1.5*self.video_fps)): screen.blit(background,(0,0)) #对齐的坐标#绘制文本pos_x = 0.5*(self.screen_width-text_size_x) #横坐标pos_y = 0.5*(self.screen_height-text_size_y) #纵坐标if i < self.video_fps:visible_area = pygame.Rect(0,0, text_size_x*i/self.video_fps, text_size_y) #可见区域else:visible_area = pygame.Rect(0,0, text_size_x, text_size_y)screen.blit(textImage, (pos_x, pos_y), visible_area)pygame.display.update() #刷新显示#录屏im = ImageGrab.grab()imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式video.write(imm) #帧率FPSClock.tick(self.fps)# pygame.quit()# sys.exit()#循环跌打绘制每帧数据print('开始循环跌打绘制每帧数据!!!')for frame in tqdm(all_frame_data.columns):for event in pygame.event.get():if event.type == QUIT:pygame.quit()sys.exit()#背景颜色为白色screen.fill(self.background_color) #当前帧数据current_frame_data = all_frame_data[[frame]].sort_values(by = frame, ascending=True)#当前帧时间frame_time = current_frame_data.columns[0][:-2]frame_time = frame_time[:4]+'/'+frame_time[4:6]+'/'+frame_time[6:8]+' ' + frame_time[8:10]+':'+frame_time[-2:]#temp_previous_frame_data记录位置数据temp_index = current_frame_data[current_frame_data[frame] > 0].indextemp_previous_frame_data = pd.DataFrame(index = temp_index, columns = ['pos_x','pos_y', 'size_x', 'size_y'])for i in range(0, hotness_title_number):text = current_frame_data.index[i] #热搜文本hotness_number = current_frame_data.iloc[i, 0] #热度if hotness_number == 0:continue#设置字体颜色,默认黑色text_color = (0, 0, 0)#设置图元颜色以及椭圆尺寸if hotness_number < 1E4:color = (0, 128, 0) #椭圆填充颜色size_x = 40 #确定椭圆尺寸,size是椭圆的矩形尺寸elif hotness_number >= 1E4 and hotness_number < 2E5:R_color = int(round(((hotness_number-1E4)/1.9E5)*128))color = (R_color, 128, 0)size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) #确定椭圆尺寸,size_x是椭圆的矩形长边尺寸elif hotness_number >= 2E5 and hotness_number < 1E6:R_color = 127 + int(round(((hotness_number-2E5)/8E5)*128))color = (R_color, 128, 0) size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)elif hotness_number >= 1E6 and hotness_number <5E6:G_color = 128 - int(round(((hotness_number-1E6)/4E6)*128))color = (255, G_color, 0)size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)elif hotness_number >= 5E6 and hotness_number < 1.5E7:R_color = 255 - int(round(((hotness_number-5E6)/1E7)*128))color = (R_color, 0, 0)size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)elif hotness_number >= 1.5E7 and hotness_number < 2E8:R_color = 128 - int(round(((hotness_number-1.5E7)/1.85E8)*128))color = (R_color, 0, 0)text_color = (255,255,255)size_x = round((0.775+math.sqrt(hotness_number/2E8))*self.screen_width, 2)else:color = (0, 0, 0)text_color = (255,255,255) size_x = round(2*self.screen_width, 2)size_y = round(size_x*self.screen_ratio, 2) #确定椭圆位置if previous_frame_data is None:pos_x, pos_y = self.calculate_localtion(temp_previous_frame_data, 0.8, size_x, size_y)#创建previous_frame_datatemp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y]else:#如果前一帧有则位置不变if text in previous_frame_data.index:pos_x = previous_frame_data.loc[text]['pos_x']pos_y = previous_frame_data.loc[text]['pos_y']else: #没有则随机位置pos_x, pos_y = self.calculate_localtion(previous_frame_data, 0.8, size_x, size_y)#temp_previous_frame_data记录位置数据temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y] #绘制椭圆pygame.draw.ellipse(screen, color, [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y])pygame.draw.ellipse(screen, (0,0,0), [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y], 3)#绘制文字,首先根据长度切割文本if len(text)<7: #根据字数划分text,长于7个字符则分割text_top = Nonetext_center = texttext_bottom = Noneelse:split = int(math.ceil(len(text)*3/7)) #中间字段字数first = int(math.ceil(len(text)-split)/2)text_top = text[:first]text_center = text[first:first+split]text_bottom = text[first+split:] #根据热度和是否分割字符串确定字符显示大小if text_top is None:text_size = int(round(size_x/len(text)))else:if len(text) < 9:text_size = int(round(0.6*size_x/len(text_center)))elif len(text) < 11:text_size = int(round(0.7*size_x/len(text_center)))else:text_size = int(round(size_x/len(text_center)))#确定文字格式和大小my_font = pygame.font.Font(self.folder_path + r'\STXINGKA.TTF',text_size)#绘制文本if text_top is not None:#新建文本图章textImage = my_font.render(text_top, True, text_color)#文本框尺寸text_size_x, text_size_y = textImage.get_size()screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 1.3*text_size_y)) if text_bottom is not None:#新建文本图章textImage = my_font.render(text_bottom, True, text_color)#文本框尺寸text_size_x, text_size_y = textImage.get_size()screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y + 0.3*text_size_y)) #新建文本图章textImage = my_font.render(text_center, True, text_color)#文本框尺寸text_size_x, text_size_y = textImage.get_size()screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 0.5*text_size_y))#确定备注文字格式和大小my_font = pygame.font.Font(self.folder_path + '\STXINGKA.TTF',40) #确定文本框背景颜色if text_color == (0,0,0):bcolor = (255,255,255)else:bcolor = (0,0,0)textImage = my_font.render(frame_time, True, text_color, bcolor)#文本框尺寸text_size_x, text_size_y = textImage.get_size()screen.blit(textImage, (4, self.screen_height-44)) previous_frame_data = temp_previous_frame_datapygame.display.update()#录屏im = ImageGrab.grab()imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式video.write(imm) #帧率FPSClock.tick(self.fps) video.release()pygame.quit()def hotness_data_settle(self):'''根据输入日期将数据划分, 插值为对应的帧数,输出插值后的整合数据Returns-------DataFrame, 整理合并后的热度数据.'''try:data_current = pd.read_csv(self.folder_path + r'\data\%s.csv'%self.date, index_col=0) #前一天热度数据data_next = pd.read_csv(self.folder_path + '\data\%s.csv'%self.next_date, index_col=0) #后一天热度数据except Exception as e:print(e)sys.exit()#截取需要的时间段热度数据data_current = data_current.loc[data_current.index >= int(self.starttime)]data_next = data_next.loc[data_next.index <= int(self.starttime)]#热度数据时间标准化data_current.index = [self.date+'0'*(4-len(str(x))) + str(x) for x in data_current.index]data_next.index = [self.next_date+'0'*(4-len(str(x))) + str(x) for x in data_next.index]all_data = data_current.append(data_next) #合并热度数据all_frame_data = pd.DataFrame()print('开始进行数据整理合并!!!')for i in tqdm(range(1, len(all_data))):data_early = list(all_data.iloc[i-1]) #前一帧数据data_latter = list(all_data.iloc[i]) #后一帧数据time_early = str(all_data.index[i-1]) #前一帧时间time_latter = str(all_data.index[i]) #后一帧时间time_early = datetime.datetime.strptime(time_early, '%Y%m%d%H%M')time_latter = datetime.datetime.strptime(time_latter, '%Y%m%d%H%M')time_difference = time_latter - time_early #时间差time_list = [time_early+datetime.timedelta(seconds=30*x) for x in range(0, int(time_difference.total_seconds()/30))]time_list = [datetime.datetime.strftime(x, '%Y%m%d%H%M%S') for x in time_list]data_frame = pd.DataFrame(columns=time_list)for j in range(0, 50):text_early = data_early[2*j]hotness_early = data_early[2*j+1]if text_early in data_latter: #前后都有一样的text的话插值,每5min10帧hotness_latter = data_latter[data_latter.index(text_early)+1]frame_increment = (hotness_latter - hotness_early)/(len(time_list)) #每帧增加的热度data_frame.loc[text_early] = [hotness_early + x*frame_increment for x in range(0,len(time_list))]else: #后面没有则10,8,6,4,2,0插值frame_increment = 2*hotness_latter/(len(time_list))middle_point = int(0.5*len(time_list))+1 #分割中间点list_head = [hotness_early + x*frame_increment for x in range(0, middle_point)] data_frame.loc[text_early] = list_head + [0 for x in range(1, middle_point-1)]for j in range(0, 50):text_latter = data_latter[2*j]if text_latter in data_frame.index:passelse: #前面没有的话则后面插值hotness_latter = data_latter[2*j+1]frame_increment = 2*hotness_latter/(len(time_list))middle_point = int(0.5*len(time_list))+1 #分割中间点list_tail = [x*frame_increment for x in range(1, middle_point-1)]data_frame.loc[text_latter] = [0 for x in range(0, middle_point)] + list_tailall_frame_data = pd.concat([all_frame_data, data_frame], axis = 1, join='outer', sort=False)all_frame_data.fillna(0, inplace=True)return(all_frame_data)def calculate_localtion(self, data, space_level, size_x, size_y):'''根据给定的已存在图元的x/y坐标数据计算新图元的坐标数据Parameters----------data : DataFrame已存在图元的x/y坐标数据.space_level : DataFrame图元间间距等级.size_x : Float当前图元x坐标.size_y : Float当前图元y坐标.Returns-------pos_x : float新图元的x坐标.pos_y : float新图元的y坐标.''' for i in range(0,10):pos_x = round(random.uniform(self.screen_width*0.1, self.screen_width*0.9), 2)pos_y = round(random.uniform(self.screen_height*0.1, self.screen_height*0.9), 2)flag = 0 #flagfor index in data.index:previous_pos_x = data.loc[index]['pos_x']previous_pos_y = data.loc[index]['pos_y']previous_size_x = data.loc[index]['size_x']previous_size_y = data.loc[index]['size_y']if abs(pos_x-previous_pos_x) < space_level*abs(previous_size_x+size_x) and abs(pos_y-previous_pos_y) < space_level*abs(previous_size_y+size_y):flag = 1 #如果间距过近则flag=1,终止迭代breakif flag == 0:return pos_x, pos_yreturn pos_x, pos_y #迭代后依旧未找到合适的值则返回随机值def add_BGM(self):'''为hotness_video函数生成的视频添加BGMReturns-------None.'''video_path = self.folder_path + '\VIDEO\%s.avi'%self.dateBGM_path = self.folder_path + '\BGM\专题片纪录片常用流行音乐-大气恢弘有气质-公司介绍(Corp_爱给网_aigei_com.mp3'video = mpe.VideoFileClip(video_path)video_duration = video.durationaudio_clip = mpe.AudioFileClip(BGM_path).set_end(video_duration)video = video.set_audio(audio_clip)export_video_path = self.folder_path + '\VIDEO\%sX.mp4'%self.datevideo.write_videofile(export_video_path)if __name__ == '__main__':yesterday = datetime.datetime.today() + datetime.timedelta(-1)yesterday = yesterday.strftime('%Y%m%d')video = WeiboHotnessVideo(yesterday)
这样就完成了所有的工作,生成并保存了微博热搜数据随时间的变化趋势视频。
2021年12月30日微博热搜
好的,本期内容就是这样,感谢大家的关注,再见!!!
微博热搜数据变化趋势视频化展示相关推荐
- Python爬取微博热搜数据之炫酷可视化
可视化展示 看完记得点个赞哟 微博炫酷可视化音乐组合版来了! 项目介绍 背景 现阶段,微博.抖音.快手.哗哩哗哩.微信公众号已经成为不少年轻人必备的"生活神器".在21世纪的今天, ...
- python爬取微博热搜数据并保存!
主要用到requests和bf4两个库将获得的信息保存在d://hotsearch.txt下importrequests;importbs4mylist=[]r=requests.get(ur- 很多 ...
- 【Python】爬虫获取微博热搜数据,response中文显示“\u7814\u7a76\u8bc1\u5b9e\u”
问题描述 在爬虫获取微博热搜数据的时候,response中文出现了不便于理解的字段,截取如下: ......[{"title_sub":"\u7814\u7a76\u8b ...
- 【Python】我用python爬取一月份微博热搜数据来分析人们对新型肺炎的关注程度变化
2020年1月23日,睡醒一觉,发现新型肺炎的影响正在以肉眼可见的速度扩散,已经放假的我只能宅在家里,不敢随便外出.实在闲得无聊,我便拿起了技术人的工具,利用python,用数据来简单分析一波新型肺炎 ...
- 15--jQuery插件大全-- 使用jsoup爬取酷我音乐和微博热搜数据
目录 歌曲来自于酷我音乐热歌榜 热搜数据来自于微博热搜 库我音乐前台代码如下: 库我音乐后台代码如下: Servlet代码: model代码: HTTPUtils工具类 微博热搜前台代码如下: 微博热 ...
- 用Python爬取微博热搜数据
废话少说,直接上代码,然后再详细介绍构造过程. 代码 import urllib.request import re import pandas as pd import time import ra ...
- 通过Python分析2020年全年微博热搜数据
本文内容 热搜抓取 热搜分析 全年热搜热度分析 全年热搜情感分析 全年热搜词云分析 全年热搜人物分析 本文小结 授人以鱼,不如授人以渔.提供代码,并非每一个技术写作者的义务.我懒得搞关注公众号.发送关 ...
- java实现微博热搜榜_微博热搜数据监测
请选择开发语言: PHP Python $api_url = 'https://登录后显示/api/68/177'; $appid = '应用id';// 在后台我的应用查看; $secret = ' ...
- 硬核吃瓜!上万条数据撕开微博热搜真相
作者 | 徐麟 来源 | 转载自数据森麟(ID:shujusenlin) 吃瓜前言 关于新浪微博,向来都是各路吃瓜群众聚集之地,大家在微博中可以尽情吃瓜,各种类型的瓜应有尽有,只有你想不到的,没有你吃 ...
- 上万条数据撕开微博热搜的真相!
戳蓝字"CSDN云计算"关注我们哦! 作者 | 徐麟,某互联网公司数据分析狮 来源 | 数据森麟(id:shujusenlin) 吃瓜前言 关于新浪微博,向来都是各路吃瓜群众聚集之 ...
最新文章
- Spring MVC文件上传
- kdj超卖_一个判断股票超买超卖现象的指标——KDJ,简单明了,准确且省心
- MySQL初始化脚本mysql_install_db使用简介及选项参数
- SAP Fiori Elements - bindComponent - binding property in XML view will trigger odata request
- svn服务器搭建-SuSE Linux Enterprise Server 11 SP3
- LuckyDraw app使用CosmosDB的成本分析
- 流媒体数据代理----Anychat
- fir.im同款企业级APP分发平台系统源码
- 容器编排技术 -- Kubernetes DNS Pod 与 Service 介绍
- IOS应用程序发布到苹果APP STORE完整流程使用教程
- python语句写入oracle_将Python变量插入Oracle数据库
- selenium chromedriver usage
- TimeBake:part1
- **带I2C的LCD1602液晶显示51单片机程序**
- 小尺寸2.4G SMD贴片天线方案 CA-C03 CrossAir贴片天线
- 数据库: mongodb导入json数据
- 工程伦理第五章习题答案
- 模仿QQ的左右滑动切换界面和下拉更新的效果
- 乐视网复牌即跌停,仍有千万买单甘作“接盘侠”
- UE4热更新:需求分析与方案设计