目的

对之前所获取的数据源进行数据分析操作


数据分析


一:商家地区分布图

通过读取本地数据源,获取其中省份的相关信息,绘制商家地区分布图,以html格式保存在本地,
浏览器打开可随鼠标移动动态显示地区分布商家数量

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
"""from pyecharts.charts import Map
import pyecharts.options as optsimport time
import pandas as pd# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
data = pd.read_csv(n)total_data = {}
for item in data['省份']:#print(item)if item not in total_data:#向字典中更新每个省份默认0病例total_data.update({item:1})else:total_data[item]+=1province=total_data.keys()
num=total_data.values()list_data=zip(province,num)#-------------------------------------------------------------------------------------
# 第二步:绘制全国商家地图
#-------------------------------------------------------------------------------------
def map_cn_disease_dis() -> Map:c = (Map().add('中国', list_data, 'china').set_global_opts(title_opts=opts.TitleOpts(title='全国商家店铺省份分布图'),visualmap_opts=opts.VisualMapOpts(is_show=True,split_number=6,is_piecewise=True,  # 是否为分段型pos_top='center',pieces=[{'min': 1000, 'color': '#7f1818'},  #不指定 max{'min': 400, 'max': 999},{'min': 200, 'max': 399},{'min': 100, 'max': 199},{'min': 10, 'max': 99},{'min': 0, 'max': 5} ],),))return c
#保存html文件
map_cn_disease_dis().render('../file/HTML/全国商家省份分布图.html')
print('文件保存完成')
结果显示

二 商品名称词云图
#!/usr/bin/env python
# -*- coding: utf-8 -*-import time
import pandas as pd#----------------------------------------------读取数据----------------------------------------------
# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
data = pd.read_csv(n)#查看数据维度(行,列)
#print(data.shape)#取出商品标题,区域,价格,销售四个维度的数据
#data=data[['商品名','价格','销售','省份']]#对每个标题进行分词,使用jieba分词#----------------------------------------------s商品名称分词处理----------------------------------------------
import jiebatitle=data['商品名']title_s=[]
#商品名分词
for line in title:title_cut=jieba.lcut(line)for i in title_cut:title_s.append(i)
#print(title_s)# 导入停用此表
stopwords = [line.strip() for line in open('../file/TXT/StopWords.txt', 'r', encoding='utf-8').readlines()]
#print(stopwords)# 剔除停用词
title_clean = []
for line in title_s:if line not in stopwords:title_clean.append(line)
#print(title_clean)# 把列表 allwords_clean_dist 转为数据框
df_allwords_clean_dist = pd.DataFrame({'allwords': title_clean
})#print(df_allwords_clean_dist)
#
# # 对过滤_去重的词语 进行分类汇总
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_count.columns = ['word', 'count']
#x[0]: x[1] for x in word_count.head(100).values#----------------------------------------------词云可视化----------------------------------------------from wordcloud import WordCloud
import matplotlib.pyplot as plt
import imageio as im
#尺寸大小
plt.figure(figsize=(8, 8))# 读取图片,用于限制大小
pic = im.imread("../file/PNG/猫.PNG")
#print(pic)
w_c = WordCloud(font_path="simhei.ttf", background_color="black",mask=pic,max_font_size=100, margin=1)
wc = w_c.fit_words({x[0]: x[1] for x in word_count.head(100).values
})#显示词云
plt.imshow(wc, interpolation='bilinear')
#坐标刻度隐藏
plt.axis("off")
plt.show()# 保存到文件
wc.to_file('../file/PNG/01商品名称词云.PNG')
结果展示

三 卖点频率关系图
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd#----------------------------------------------读取数据----------------------------------------------
# 读取数据
#n = time.strftime("%Y-%m-%d") + "bak.CSV"
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data_sales=data['销量']
#查看数据维度(行,列)
#print(data.shape)#取出商品标题,区域,价格,销售四个维度的数据
#data=data[['商品名','价格','销售','省份']]#对每个标题进行分词,使用jieba分词#----------------------------------------------s商品名称分词处理----------------------------------------------
import jiebatitle=data['商品名']title_s=[]
#商品名分词
for line in title:title_cut=jieba.lcut(line)for i in title_cut:title_s.append(i)
#print(title_s)# 导入停用此表
stopwords = [line.strip() for line in open('../file/TXT/StopWords.txt', 'r', encoding='utf-8').readlines()]
#print(stopwords)# 剔除停用词
title_clean = []
for line in title_s:if line not in stopwords:title_clean.append(line)
#print(title_clean)# 把列表 allwords_clean_dist 转为数据框
df_allwords_clean_dist = pd.DataFrame({'allwords': title_clean
})#print(df_allwords_clean_dist)
#
# # 对过滤_去重的词语 进行分类汇总
word_count = df_allwords_clean_dist.allwords.value_counts().reset_index()
word_count.columns = ['word', 'count']
#print(word_count)#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#获取数据nume,sales=[],[]
for shopname,shopsales in word_count.head(30).values:nume.append(shopname)sales.append(shopsales)
# names = total_data.keys()
# # nums = total_data.values()
# # print(names)
# # print(nums)# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')# 设置标题
plt.xlabel("卖点", fontproperties='SimHei', size=12)
plt.ylabel("频率", fontproperties='SimHei', rotation=90, size=12)
plt.title("卖点频率关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=-40, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)#保存并显示
plt.savefig('../file/PNG/02卖点频率关系图.png')
plt.show()
结果展示

四 品牌商品关系图
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'"""
import time
import pandas as pd
import matplotlib.pyplot as plt# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item in data['商品名']:#print(item)if '华为' in item:huawei+=1elif '小米' in item:xioami+=1elif '三星' in item:sanxing+=1elif '苹果' in item:apple+=1elif 'vivo' in item:vivo+=1elif 'oppo' in item:oppo+=1else:other+=1import matplotlib.pyplot as plt
import numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#获取数据names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌商品关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-40, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/08品牌商品关系图.png')
plt.show()
结果展示

五 品牌市场占比
import time
import pandas as pd
import matplotlib.pyplot as plt# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item in data['商品名']:#print(item)if '华为' in item:huawei+=1elif '小米' in item:xioami+=1elif '三星' in item:sanxing+=1elif '苹果' in item:apple+=1elif 'vivo' in item:vivo+=1elif 'oppo' in item:oppo+=1else:other+=1
#获取数据names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]
goods_sum=len(data['商品名'])import matplotlib.pyplot as plt
import numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号plt.pie(nums,labels=names,autopct='%.2f%%')
plt.title("品牌市场比重", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()plt.savefig('../file/PNG/09品牌市场比重.png')
plt.show()
结果展示

六 品牌销量关系图
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item,num in zip(data['商品名'],data['销量']):if '华为' in item:huawei+=numelif '小米' in item:xioami+=numelif '三星' in item:sanxing+=numelif '苹果' in item:apple+=numelif 'vivo' in item:vivo+=numelif 'oppo' in item:oppo+=numelse:#print(item)other+=num#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#获取数据names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌-销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-0, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)plt.grid(linestyle='-.')
plt.savefig('../file/PNG/10品牌销量关系图.png')
plt.show()
结果展示

七 品牌销售额对比
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import pandas as pd
import matplotlib.pyplot as plt# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)huawei,xioami,sanxing,apple,vivo,oppo,other=0,0,0,0,0,0,0
for item,price,num in zip(data['商品名'],data['价格'],data['销量']):#print(item,price)price=int(price)if '华为' in item:huawei+=price*numelif '小米' in item:xioami+=price*numelif '三星' in item:sanxing+=price*numelif '苹果' in item:apple+=price*numelif 'vivo' in item:vivo+=price*numelif 'oppo' in item:oppo+=price*numelse:other+=price*num#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#获取数据names = ['华为','小米','三星','苹果','VIVO','OPPO','其他']
nums = [huawei,xioami,sanxing,apple,vivo,oppo,other]# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(names,nums, width=0.3, color='green')# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(names), fontproperties='SimHei', rotation=-0, size=10)
# 显示数字
for a, b in zip(list(names), list(nums)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/11品牌销售额关系图.png')
plt.show()
结果展示

八 价格销量
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
print(len(data['销量']))
#data=list()price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more =0,0,0,0,0,0,0for i,sale in zip(data['价格'],data['销量']):i=int(i)sale=int(sale)if i<1000:price_1000+=saleelif i<2000:#print(sale)price_2000+=sale#print(price_2000)elif i<3000:price_3000+=saleelif i<4000:price_4000+=saleelif i<5000:price_5000+=saleelif i<6000:price_6000+=saleelse:price_more+=saleprint(price_2000)#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------import matplotlib.pyplot as pltplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号nume=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
sales=[price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more ]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/04价格销量关系图.png')
plt.show()

九 价格销售额
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/24'
"""
import time
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
print(len(data['销量']))
#data=list()price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more =0,0,0,0,0,0,0for i,sale in zip(data['价格'],data['销量']):i=int(i)sale=i*saleif i<1000:price_1000+=saleelif i<2000:#print(sale)price_2000+=sale#print(price_2000)elif i<3000:price_3000+=saleelif i<4000:price_4000+=saleelif i<5000:price_5000+=saleelif i<6000:price_6000+=saleelse:price_more+=saleprint(price_2000)#------------------------------------------------------------------------------
# 第二步:绘制柱状图
#------------------------------------------------------------------------------import matplotlib.pyplot as pltplt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号nume=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
sales=[price_1000,price_2000,price_3000,price_4000,price_5000,price_6000,price_more ]
# 绘图
plt.figure(figsize=[10,6])
#plt.bar(names, nums, width=0.3, color='green')
plt.bar(nume,sales, width=0.3, color='green')# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(nume), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(nume), list(sales)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/05价格销售额关系图.png')
plt.show()

十 价格影响图
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/25'
"""
import time
import pandas as pd# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
#数据定义
count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more =0,0,0,0,0,0,0
sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more =0,0,0,0,0,0,0
money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more =0,0,0,0,0,0,0
#数据处理
for i,j in zip(data['价格'],data['销量']):i=int(i)#print(i)if i<1000:count_1000+=1sale_1000+=jmoney_1000+=i*jelif i<2000:count_2000 += 1sale_2000 += jmoney_2000 += i * jelif i<3000:count_3000 += 1sale_3000 += jmoney_3000 += i * jelif i<4000:count_4000 += 1sale_4000 += jmoney_4000 += i * jelif i<5000:count_5000 += 1sale_5000 += jmoney_5000 += i * jelif i<6000:count_6000 += 1sale_6000 += jmoney_6000 += i * jelse:count_more += 1sale_more += jmoney_more += i * j
#print(count_2000)
#------------------------------------------------------------------------------
# 初始化
#------------------------------------------------------------------------------import matplotlib.pyplot as pltplt.figure(figsize=[18,18])
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#------------------------------------------------------------------------------
# 数据获取
#------------------------------------------------------------------------------
index=['0_1000','1000_2000','2000_3000','3000_4000','4000_5000','5000_6000','6000+']
count=[count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more ]
sale=[sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more]
money=[money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more]#------------------------------------------------------------------------------
# p1价格商品关系条形图
#------------------------------------------------------------------------------
p1=plt.subplot(221)
plt.bar(index,count, width=0.3, color='green')
# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格商品数量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(count)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p1)
#------------------------------------------------------------------------------
# 价格商品数量饼图
#------------------------------------------------------------------------------
p2=plt.subplot(222)plt.pie(count,labels=index,autopct='%.2f%%')
plt.title("价格商品总量占比", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()
plt.sca(p2)
#------------------------------------------------------------------------------
# 价格销量关系
#------------------------------------------------------------------------------
p3=plt.subplot(223)
plt.bar(index,sale, width=0.3, color='black')# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)plt.grid(linestyle='-.')
# 显示数字
for a, b in zip(list(index), list(sale)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.sca(p3)#------------------------------------------------------------------------------
# 价格销售额
#------------------------------------------------------------------------------
p4=plt.subplot(224)
plt.bar(index,money, width=0.3, color='red')# 设置标题
plt.xlabel("价格", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("价格销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(money)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.sca(p4)#------------------------------------------------------------------------------
# 数据显示
#------------------------------------------------------------------------------
plt.grid(linestyle='-.')
plt.savefig('../file/PNG/06价格影响关系图.png')
plt.show()

十一 品牌影响关系图
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'jia666666'
__time__ = '2020/3/25'
"""import time
import pandas as pd# 读取数据
n = '../file/CSV/智能手机'  + '-all.csv'
data = pd.read_csv(n)
data.fillna(value=0,inplace=True)
#数据定义
count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more =0,0,0,0,0,0,0
sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more =0,0,0,0,0,0,0
money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more =0,0,0,0,0,0,0
#数据处理
for item,i,j in zip(data['商品名'],data['价格'],data['销量']):i=int(i)#print(i)if '华为' in item:count_1000+=1sale_1000+=jmoney_1000+=i*jelif '小米' in item:count_2000 += 1sale_2000 += jmoney_2000 += i * jelif '三星' in item:count_3000 += 1sale_3000 += jmoney_3000 += i * jelif '苹果' in item:count_4000 += 1sale_4000 += jmoney_4000 += i * jelif 'vivo' in item or 'VIVO' in item :count_5000 += 1sale_5000 += jmoney_5000 += i * jelif 'oppo' in item or 'OPPO' in item:count_6000 += 1sale_6000 += jmoney_6000 += i * jelse:count_more += 1sale_more += jmoney_more += i * j
#print(count_2000)
#------------------------------------------------------------------------------
# 初始化
#------------------------------------------------------------------------------import matplotlib.pyplot as pltplt.figure(figsize=[20,18])
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    #用来正常显示负号#------------------------------------------------------------------------------
# 数据获取
#------------------------------------------------------------------------------
index=['华为','小米','三星','苹果','VIVO','OPPO','其他']
count=[count_1000,count_2000,count_3000,count_4000,count_5000,count_6000,count_more ]
sale=[sale_1000,sale_2000,sale_3000,sale_4000,sale_5000,sale_6000,sale_more]
money=[money_1000,money_2000,money_3000,money_4000,money_5000,money_6000,money_more]#------------------------------------------------------------------------------
# p1价格商品关系条形图
#------------------------------------------------------------------------------
p1=plt.subplot(221)
plt.bar(index,count, width=0.3, color='green')
# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("商品数量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌商品数量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(count)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p1)
#------------------------------------------------------------------------------
# 品牌商品数量饼图
#------------------------------------------------------------------------------
p2=plt.subplot(222)plt.pie(count,labels=index,autopct='%.2f%%')
plt.title("品牌商品总量占比", fontproperties='SimHei', size=16)
plt.axis('equal')
plt.legend()
plt.sca(p2)
#------------------------------------------------------------------------------
# 品牌销量关系
#------------------------------------------------------------------------------
p3=plt.subplot(223)
plt.bar(index,sale, width=0.3, color='black')# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销量", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销量关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(sale)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p3)#------------------------------------------------------------------------------
# 品牌销售额
#------------------------------------------------------------------------------
p4=plt.subplot(224)
plt.bar(index,money, width=0.3, color='red')# 设置标题
plt.xlabel("品牌", fontproperties='SimHei', size=12)
plt.ylabel("销售额", fontproperties='SimHei', rotation=90, size=12)
plt.title("品牌销售额关系图", fontproperties='SimHei', size=16)
#倾斜度角
plt.xticks(list(index), fontproperties='SimHei', rotation=0, size=10)
# 显示数字
for a, b in zip(list(index), list(money)):#x轴,y轴,显示数值,水平居中,垂直底部,字体大小plt.text(a,b,b, ha='center', va='bottom', size=10)
plt.grid(linestyle='-.')
plt.sca(p4)#------------------------------------------------------------------------------
# 数据显示
#------------------------------------------------------------------------------plt.savefig('../file/PNG/12品牌影响关系图.png')
plt.show()

python爬虫天猫商品数据及分析(2)相关推荐

  1. python爬虫天猫商品数据及分析(5)

    目的 对获取的天猫商品-智能手机评价 进行数据分析 实现 一 评价词云 #!/usr/bin/env python # -*- coding: utf-8 -*-import time import ...

  2. python爬虫天猫商品数据及分析(3)

    目的 获取商品关键词-智能手机的有关评价信息 评价信息(网络类型,机身颜色,套餐类型,存储容量,版本类型,评价内容,评价时间) 为后面的数据分析提供数据源 源码 #!/usr/bin/env pyth ...

  3. python爬虫天猫商品数据及分析(4)

    目的 数据导入数据库 数据颜色清洗 机身颜色分析 套餐类型分析 实现 一 商品评价信息导入mysql #!/usr/bin/env python # -*- coding: utf-8 -*-impo ...

  4. 爬虫项目八:Python对天猫商品数据、评论数据爬取

    文章目录 前言 一.商品数据 1.分析url 2.登录账号 3.解析数据 4.模拟滑动滑块 二.评论数据 1.分析url 2.解析数据 前言 天猫商城商品数据.评论数据爬取 提示:以下是本篇文章正文内 ...

  5. Python爬虫大作业+数据可视化分析(抓取python职位)

    目录 一.抓取并解析数据 1.导入相关库 2.获取网页信息 3.数据清洗 4.爬取结果: ??二.保存数据 1.保存到excel中 2.保存到数据库中 ? ?3.调用 三.使用flask,实现可视化 ...

  6. 通过爬取天猫商品评论实例分析Python爬取ajax动态生成的数据

    本文主要通过爬取天猫商品kindle的评论为例来说明利用python爬取ajax动态生成的数据的方式,本文使用的工具如下: 工具 chrome浏览器[寻找评论的动态链接] python3.5[执行代码 ...

  7. Python爬虫实战:天猫商品数据爬虫使用教程

    本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理. 下载chrome浏览器 查看chrome浏览器的版本号,下载对应版本号的chro ...

  8. Python爬虫学习教程:天猫商品数据爬虫

    天猫商品数据爬虫使用教程 下载chrome浏览器 查看chrome浏览器的版本号,下载对应版本号的chromedriver驱动 pip安装下列包 pip install selenium pip in ...

  9. 用python爬取天猫商品评论并分析(2)

    用python爬取天猫商品评论并分析(2) 之前介绍过天猫数据的爬取和数据初步处理,今天介绍下 将采集的评论进行文本分析!下面是总流程: 0. 主要流程 0. 数据采集 这一步参考网址:https:/ ...

最新文章

  1. [转载]《博客园精华集》Winform筛选结果(共105篇)
  2. 查看控制文件的内容(oracle)
  3. Re:从 0 开始的微服务架构--(三)微服务架构 API 的开发与治理--转
  4. html文本可选择,如何用javascript选择html文本?
  5. form select multiple 某个字段是数组_Hive取非Group by字段数据的方法
  6. 7-4 统计一行文本的单词个数 (15 分)
  7. 155.最小栈(力扣leetcode) 博主可答疑该问题
  8. 仓库管理系统java和mysql_基于Android的仓库管理系统APP设计与实现毕业论文+前后台(Java+Mysql)源码及数据库文件+前后台运行演示视频...
  9. 2020 年 Python 知识清单(网络爬虫)
  10. 2019 acm-icpc 西安全国邀请赛 J
  11. UE4-目录结构简介
  12. python re 查找字符串中是否含有汉字
  13. 国信长天蓝桥杯嵌入式类——stm32——使用keil4建立工程文件过程
  14. 紫薇星上的数据结构(1)
  15. 对CreateCompatibleDC的粗浅认识
  16. Android View详解(三) 视图状态及重绘流程分析
  17. 修改管理员信息php,修改管理员_ThinkPHP_大笨熊_IT技术平台
  18. .NET 7 预览版 1 发布
  19. PHPBB3的用户密码
  20. oracle补丁集2020,Oracle发布2020年首批关键补丁更新,多达334个安全补丁

热门文章

  1. 最优控制理论 五+、极大值原理Bang-Bang控制问题的求解
  2. Kubernetes Pod 网络精髓:pause 容器详解
  3. 计算机应用基础 周凌,计算机基础毕业论文范文
  4. 能将用户切片链接到其他html页面或位置,网页美工设计..doc
  5. N+1道Vue面试题,快来康康
  6. 2719 sheldon数
  7. (使用工具)Matlab转C++
  8. 我个人整理的AD/2000技巧,各位收藏吧!60多个。 转贴
  9. Internal error 2356.CABS.W1.cab
  10. JS函数curry(柯里化)