007.python科学计算库matplotlib(下)

测试数据 fandango_scores.csv

bar

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arangereviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# bar()方法有两个必需的参数，左边和高度。
# 我们使用左参数来指定条形图左侧的x坐标。
# 我们使用高度参数来指定每个栏的高度
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
# ix[i, num_cols] 获取第i行的num_cols列中的数据，i从0开始
# 获取的列中数据即分别对应条形图的高度
bar_heights = norm_reviews.ix[0, num_cols].values
print(bar_heights)  # [4.3 3.55 3.9 4.5 5.0]
# 条形图的位置
bar_positions = arange(5) + 0.75
print(bar_positions)  # [0.75 1.75 2.75 3.75 4.75]
fig, ax = plt.subplots()
# 0.5 条形图的宽度
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arangereviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 默认情况下，matplotlib将x轴标记标签设置为条上的整数值
# 在x轴上(从0到6)，我们只需要在横轴上的横轴上标记条就可以了。
# 我们可以使用ax .set_xticks()将ticks的位置改变为[1,2,3,4,5]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1, 6)
# 创建一个图和一组子图
fig, ax = plt.subplots()ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
# 用字符串标签列表设置x-tick标签
ax.set_xticklabels(num_cols, rotation=45)ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

barh

import matplotlib.pyplot as plt
import pandas as pd
from numpy import arangereviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 默认情况下，matplotlib将x轴标记标签设置为条上的整数值
# 在x轴上(从0到6)，我们只需要在横轴上的横轴上标记条就可以了。
# 我们可以使用ax .set_xticks()将ticks的位置改变为[1,2,3,4,5]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1, 6)
# 创建一个图和一组子图
fig, ax = plt.subplots()
# barh 做一个水平条形图
ax.barh(bar_positions, bar_heights, 0.5)ax.set_yticks(tick_positions)
# 用字符串标签列表设置y-tick标签
ax.set_yticklabels(num_cols)ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

scatter

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# 让我们来看一个可以帮助我们形象化许多点的图
fig, ax = plt.subplots()
# 不同标记大小和/或颜色的 y vs x 散点图
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
fig = plt.figure(figsize=(5, 10))
# 添加两个子图，均为散点图
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

hist

import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
# value_counts 返回包含唯一值计数的对象。结果对象将按降序排列，因此第一个元素是最频繁出现的元素。默认情况下排除NA值
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
# 按标签(沿着轴)对对象排序 此处的标签是 Fandango_Ratingvalue 的值
fandango_distribution = fandango_distribution.sort_index()imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
# 按标签(沿着轴)对对象排序 此处的标签是 IMDB_norm 的值
imdb_distribution = imdb_distribution.sort_index()print(fandango_distribution)
print("-------------------------------")
print(imdb_distribution)

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]fig, ax = plt.subplots()
# 将Fandango_Ratingvalue的范围平均拆分为bins个宽度，
# 并只显示(4, 5)范围内的数据
# range不指定默认展示所有,bins不指定默认展示10个
ax.hist(norm_reviews['RT_user_norm'], range=(4, 5), bins=20)
plt.show()

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]fig = plt.figure(figsize=(5, 10))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
# 设置y轴的数据限制
ax1.set_ylim(0, 50)ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)plt.show()

boxplot

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]fig, ax = plt.subplots()
# 做一个盒须图
# 为“x”的每一列或“x”序列中的每个向量做一个盒状和须状图。
# 该框从数据的下四分位数扩展到上四分位数，中间有一条线。
# 晶须从盒中伸出来显示数据的范围。传单点是那些超过末尾的胡子
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()

import matplotlib.pyplot as plt
import pandas as pdreviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0, 5)
plt.show()

007.python科学计算库matplotlib(下)相关推荐

005.python科学计算库pandas(下)
测试数据 fandango_score_comparison.csv series import pandas as pd from pandas import Seriesfandango = pd ...
Python科学计算库核心知识点总结_代码篇(ML/DL依赖语法)
Python科学计算库核心知识点总结_代码篇(ML/DL依赖语法) ...
初识 Python 科学计算库之 NumPy（创建多维数组对象）
文章目录参考描述 NumPy 特点获取导入多维数组对象 np.array() np.asarray() 范围随机概览 np.random.randn() np.random.normal ...
一文带你熟悉简单实用的Python科学计算库NumPy
Python科学计算库NumPy 安装数组的创建 array创建 **arange** 创建 **随机数创建** 方法numpy.random.random(size=None) 方法numpy.r ...
Python 科学计算库 Numpy 准备放弃 Python 2 了
Numpy 是 Python 的一个科学计算库,提供了矩阵运算的功能,一般与 Scipy.matplotlib 一起使用. 今天 Numpy 的 GitHub 主页上发文称,Numpy 库准备从 20 ...
python科学计算库安装
python科学计算相关的库包括numpy,scipy,matplotlib等,但是自己安装比较不容易,倒不是安装过程有多难,而是会出现各种各样的问题,现在做一记录安装顺序numpy -> s ...
python科学计算库-数值计算库与科学计算库
BLAS 接口 BLAS , LAPACK , ATLAS 这些数值计算库的名字很类似,他们之间有什么关系呢?BLAS是一组线性代数运算接口,目前是事实上的标准,很多数值计算/科学计算都实现了这套接口 ...
python科学计算库numpy和绘图库PIL的结合,素描图片(原创)
# 导入绘图库 from PIL import Image #导入科学计算库 import numpy as np #封装一个图像处理工具类 class TestNumpy(object):def p ...
Python | 科学计算库
一.Numpy 1.ndarray对象 python提供了array模块,它可以直接保存数值(而不是对象),但是它不支持多维数组,也缺乏丰富的运算函数 ndarray即n维数组,它弥补了以上不足,提供 ...

007.python科学计算库matplotlib(下)

测试数据 fandango_scores.csv

bar

barh

scatter

hist

boxplot

007.python科学计算库matplotlib(下)相关推荐

最新文章

热门文章