pandas--traning-how much sugar do we eat
First check:how much sugar a number of countries take in
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 读取数据并将 countries数据全部转为小写 .str.lower() 方法
world_food_facts = pd.read_csv('FoodFacts.csv')
world_food_facts.countries = world_food_facts.countries.str.lower()# 取平均值
def mean(l):return float(sum(l)) / len(l)# 去掉缺失值
world_sugars = world_food_facts[world_food_facts.sugars_100g.notnull()]
#print(world_sugars)
# 只统计特定国家,转为 list 数据结构,因为有些国家名字不同,用list容易相加,方便最后统计平均值
def return_sugars(country):return world_sugars[world_sugars.countries == country].sugars_100g.tolist()# Get list of sugars per 100g for some countries
fr_sugars = return_sugars('france') + return_sugars('en:fr')
za_sugars = return_sugars('south africa')
uk_sugars = return_sugars('united kingdom') + return_sugars('en:gb')
us_sugars = return_sugars('united states') + return_sugars('en:us') + return_sugars('us')
sp_sugars = return_sugars('spain') + return_sugars('españa') + return_sugars('en:es')
nd_sugars = return_sugars('netherlands') + return_sugars('holland')
au_sugars = return_sugars('australia') + return_sugars('en:au')
cn_sugars = return_sugars('canada') + return_sugars('en:cn')
de_sugars = return_sugars('germany')countries = ['FR', 'ZA', 'UK', 'US', 'ES', 'ND', 'AU', 'CN', 'DE']
# 取均值
sugars_l = [mean(fr_sugars), mean(za_sugars), mean(uk_sugars), mean(us_sugars), mean(sp_sugars), mean(nd_sugars),mean(au_sugars),mean(cn_sugars),mean(de_sugars)]
# 可视化
y_pos = np.arange(len(countries))
plt.bar(y_pos, sugars_l, align='center', alpha=0.5)
plt.title('Average total sugar content per 100g')
plt.xticks(y_pos, countries)
plt.ylabel('Sugar/100g')plt.show()
second check:how much salt do we eat
ps,最后将结果转为Series并对values进行排序 sort_values()
import pandas as pd
import numpy as np
import matplotlib.pyplot as pltworld_food_facts = pd.read_csv('FoodFacts.csv')
world_food_facts.countries = world_food_facts.countries.str.lower()def mean(l):return float(sum(l)) / len(l)world_sodium = world_food_facts[world_food_facts.sodium_100g.notnull()]def return_sodium(country):return world_sodium[world_sodium.countries == country].sodium_100g.tolist()# Get list of sodium per 100g for some countries
fr_sodium = return_sodium('france') + return_sodium('en:fr')
za_sodium = return_sodium('south africa')
uk_sodium = return_sodium('united kingdom') + return_sodium('en:gb')
us_sodium = return_sodium('united states') + return_sodium('en:us') + return_sodium('us')
sp_sodium = return_sodium('spain') + return_sodium('españa') + return_sodium('en:es')
ch_sodium = return_sodium('china')
nd_sodium = return_sodium('netherlands') + return_sodium('holland')
au_sodium = return_sodium('australia') + return_sodium('en:au')
jp_sodium = return_sodium('japan') + return_sodium('en:jp')
de_sodium = return_sodium('germany')countries = ['FR', 'ZA', 'UK', 'USA', 'ES', 'CH', 'ND', 'AU', 'JP', 'DE']
sodium_l = [mean(fr_sodium), mean(za_sodium), mean(uk_sodium), mean(us_sodium), mean(sp_sodium), mean(ch_sodium),mean(nd_sodium),mean(au_sodium),mean(jp_sodium),mean(de_sodium)]y_pos = np.arange(len(countries))
s1 = pd.Series(sodium_l,index = countries)
print(s1.sort_values())
plt.bar(y_pos, sodium_l, align='center', alpha=0.5)
plt.title('Average sodium content per 100g')
plt.xticks(y_pos, countries)
plt.ylabel('Sodium/100g')plt.show()
Third check: how many additives r in our food
import pandas as pd
import numpy as np
import matplotlib.pyplot as pltworld_food_facts = pd.read_csv('FoodFacts.csv')
world_food_facts.countries = world_food_facts.countries.str.lower()def mean(l):return float(sum(l)) / len(l)world_additives = world_food_facts[world_food_facts.additives_n.notnull()]def return_additives(country):return world_additives[world_additives.countries == country].additives_n.tolist()# Get list of additives amounts for some countries
fr_additives = return_additives('france') + return_additives('en:fr')
za_additives = return_additives('south africa')
uk_additives = return_additives('united kingdom') + return_additives('en:gb')
us_additives = return_additives('united states') + return_additives('en:us') + return_additives('us')
sp_additives = return_additives('spain') + return_additives('españa') + return_additives('en:es')
ch_additives = return_additives('china')
nd_additives = return_additives('netherlands') + return_additives('holland')
au_additives = return_additives('australia') + return_additives('en:au')
jp_additives = return_additives('japan') + return_additives('en:jp')
de_additives = return_additives('germany')countries = ['FR', 'ZA', 'UK', 'US', 'ES', 'CH', 'ND', 'AU', 'JP', 'DE']
additives_l = [mean(fr_additives), mean(za_additives), mean(uk_additives), mean(us_additives), mean(sp_additives), mean(ch_additives),mean(nd_additives),mean(au_additives),mean(jp_additives),mean(de_additives)]y_pos = np.arange(len(countries))plt.bar(y_pos, sodium_l, align='center', alpha=0.5)
plt.title('Average amount of additives')
plt.xticks(y_pos, countries)
plt.ylabel('Amount of additives')plt.show()
Final,use groupby functionality in pandas to make analysis simply
import zipfile
import os
import pandas as pd
import matplotlib.pyplot as pltdef unzip(zip_filepath, dest_path):"""解压zip文件"""with zipfile.ZipFile(zip_filepath) as zf:zf.extractall(path=dest_path)
# 获取数据文件名
def get_dataset_filename(zip_filepath):with zipfile.ZipFile(zip_filepath) as zf:return zf.namelist()[0]def run_main():"""主函数"""# 声明变量dataset_path = './data' # 数据集路径zip_filename = 'open-food-facts.zip' # zip文件名zip_filepath = os.path.join(dataset_path, zip_filename) # zip文件路径dataset_filename = get_dataset_filename(zip_filepath) # 数据集文件名(在zip中)dataset_filepath = os.path.join(dataset_path, dataset_filename) # 数据集文件路径print('解压zip...', end='')unzip(zip_filepath, dataset_path)print('完成.')# 读取数据data = pd.read_csv(dataset_filepath, usecols=['countries', 'additives_n'])# 分析各国家食物中的食品添加剂种类个数# 1. 数据清理# 去除缺失数据data.dropna(inplace=True)data['countries_en'] = data['countries_en'].str.lower()# 2. 数据分组统计country_additives = data['additives_n'].groupby(data['countries_en']).mean()print(country_additives)# 3. 按值从大到小排序result = country_additives[country_additives > 0].sort_values(ascending=False)# 4. pandas可视化top10result.iloc[:10].plot.bar()plt.show()# 5. 保存处理结果result.to_csv('./country_additives.csv')# 删除解压数据,清理空间if os.path.exists(dataset_filepath):os.remove(dataset_filepath)
pandas--traning-how much sugar do we eat相关推荐
- 一文了解异步编程基础
什么是异步编程? 异步编程是指并发编程的范式,其中除了单个主应用程序线程之外,工作可以委托给一个或多个并行工作线程.这被称为非阻塞系统,其中整体系统速度不受订单执行的影响,并且多个进程可以同时发生. ...
- 机器学习基础 --- pandas的基本使用
一.pandas的简介 Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的.Pandas 纳入了大量库和一些 ...
- 九、Pandas高级处理
4.6高级处理-缺失值处理 点击标题即可获取文章源代码和笔记 数据集:https://download.csdn.net/download/weixin_44827418/12548095 Panda ...
- 理想国pandas练习题3
需求 存在test.json文件(文末),请完成以下需求 题1:读取json文件,并存储为csv文件 题1 # 题1:读取json文件,并存储为csv文件 import pandas as pddf ...
- 理想国pandas练习题4
需求 存在test.json文件(文末),请完成以下需求 题1:读取json文件,并存储为csv文件 题1 # 题1:读取json文件,并存储为csv文件 import pandas as pddf ...
- pandas用法小结
前言 个人感觉网上对pandas的总结感觉不够详尽细致,在这里我对pandas做个相对细致的小结吧,在数据分析与人工智能方面会有所涉及到的东西在这里都说说吧,也是对自己学习的一种小结! pandas用 ...
- 数据科学库笔记(四)pandas
文章目录 (一)什么是pandas (二) pandas安装 2.1 直接命令安装 2.2 下载pipy文件安装 (三)pandas的常用数据类型 3.1 pandas之Series创建 3.2 pa ...
- lungs UFA eat walnuts.
1, shrimp + D C = poisoning 2, cold medicine + Coke = poisoning 3, egg avoid saccharin ┄ ┄ with food ...
- Pandas 秘籍:1~5
原文:Pandas Cookbook 协议:CC BY-NC-SA 4.0 译者:飞龙 一.Pandas 基础 在本章中,我们将介绍以下内容: 剖析数据帧的结构 访问主要的数据帧组件 了解数据类型 选 ...
最新文章
- 怎么逐步突破,成为Python高手?
- 【工具】公网临时大文件传输工具
- 数据库视频总结三(游标和事务)
- boost::uuids::random_generator相关的测试程序
- idea 关闭检查更新_Intellij idea的抑制警告(SuppressWarnings)列表(正在持续更新)
- Nginx 性能调优
- rust投递箱连接箱子_海门市围板箱定制围板箱内衬
- 让人头大的Tablet PC
- CAN总线和RS485的比较:
- 圆周率π的近似计算(三)-MapReduce分布式计算入门
- 关于数字化营销,这些知识点你一定要知道
- 没有这个传奇工程师,就没有今天的 Windows
- 两个正态总体方差比的置信区间
- android集成环信客服云_2019.6.12
- Android开发自定义View之仿米家APP双色灯控制UI:做一个智能家居产品的简单智能灯UI !(附带Demo)
- Android使用高德地图api实现基础定位
- 机器之心最干的文章:机器学习中的矩阵、向量求导
- 搜索引擎免费登陆入口
- 微生物组-宏基因组分析专题技术研讨会(2023.3)
- 人脸检测:Viola-Jones