
import pandas as pd
country1 = pd.Series({'Name': '中国','Language': 'Chinese','Area': '9.597M km2','Happiness Rank': 79})
country2 = pd.Series({'Name': '美国','Language': 'English (US)','Area': '9.834M km2','Happiness Rank': 14})
country3 = pd.Series({'Name': '澳大利亚','Language': 'English (AU)','Area': '7.692M km2','Happiness Rank': 9})
df = pd.DataFrame([country1, country2, country3], index=['CH', 'US', 'AU'])
print(df)Area  Happiness Rank      Language  Name
CH  9.597M km2              79       Chinese    中国
US  9.834M km2              14  English (US)    美国
AU  7.692M km2               9  English (AU)  澳大利亚# 添加数据
# 如果个数小于要求的个数,会自动进行“广播”操作
# 如果大于要求的个数,会报错
df['Location'] = '地球'
df['Region'] = ['亚洲', '北美洲', '大洋洲']
print(df)Name      Language        Area  Happiness Rank Location
CH    中国       Chinese  9.597M km2              79       地球
US    美国  English (US)  9.834M km2              14       地球
AU  澳大利亚  English (AU)  7.692M km2               9       地球Name      Language        Area  Happiness Rank Location Region
CH    中国       Chinese  9.597M km2              79       地球     亚洲
US    美国  English (US)  9.834M km2              14       地球    北美洲
AU  澳大利亚  English (AU)  7.692M km2               9       地球    大洋洲


# 行索引
Area              9.597M km2
Happiness Rank            79
Language             Chinese
Name                      中国
Location                  地球
Region                    亚洲
Name: CH, dtype: object<class 'pandas.core.series.Series'>iloc:
Area                9.834M km2
Happiness Rank              14
Language          English (US)
Name                        美国
Location                    地球
Region                     北美洲
Name: US, dtype: object<class 'pandas.core.series.Series'># 列索引
print(type(df['Area']))CH    9.597M km2
US    9.834M km2
AU    7.692M km2
Name: Area, dtype: object
<class 'pandas.core.series.Series'># 获取不连续的列数据
print(df[['Name', 'Area']])Name        Area
CH    中国  9.597M km2
US    美国  9.834M km2
AU  澳大利亚  7.692M km2# 混合索引
# 注意写法上的区别
9.597M km2
9.597M km2
9.597M km2
9.597M km2
9.597M km2# 转换行和列
print(df.T)CH            US            AU
Area            9.597M km2    9.834M km2    7.692M km2
Happiness Rank          79            14             9
Language           Chinese  English (US)  English (AU)
Name                    中国            美国          澳大利亚
Location                地球            地球            地球
Region                  亚洲           北美洲           大洋洲


# 注意drop操作只是将修改后的数据copy一份,而不会对原始数据进行修改
print(df)Area  Happiness Rank      Language  Name Location Region
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲Area  Happiness Rank      Language  Name Location Region
CH  9.597M km2              79       Chinese    中国       地球     亚洲
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲print(df.drop(['CH'], inplace=True))
# 如果使用了inplace=True,会在原始数据上进行修改,同时不会返回一个copy
print(df)NoneArea  Happiness Rank      Language  Name Location Region
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲#  如果需要删除列,需要指定axis=1
print(df.drop(['Area'], axis=1))
print(df)Happiness Rank      Language  Name Location Region
US              14  English (US)    美国       地球    北美洲
AU               9  English (AU)  澳大利亚       地球    大洋洲Area  Happiness Rank      Language  Name Location Region
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲# 也可直接使用del关键字
del df['Name']
print(df)Area  Happiness Rank      Language Location Region
US  9.834M km2              14  English (US)       地球    北美洲
AU  7.692M km2               9  English (AU)       地球    大洋洲


print(df['Happiness Rank'])US    14
AU     9
Name: Happiness Rank, dtype: int64# 注意从DataFrame中取出的数据进行操作后,会对原始数据产生影响
ranks = df['Happiness Rank']
ranks += 2
print(df)US    16
AU    11
Name: Happiness Rank, dtype: int64Area  Happiness Rank      Language Location Region
US  9.834M km2              16  English (US)       地球    北美洲
AU  7.692M km2              11  English (AU)       地球    大洋洲# 注意从DataFrame中取出的数据进行操作后,会对原始数据产生影响
# 安全的操作是使用copy()
ranks = df['Happiness Rank'].copy()
ranks += 2
print(df)US    18
AU    13
Name: Happiness Rank, dtype: int64Area  Happiness Rank      Language Location Region
US  9.834M km2              16  English (US)       地球    北美洲
AU  7.692M km2              11  English (AU)       地球    大洋洲# 加载csv文件数据
reprot_2015_df = pd.read_csv('./2015.csv')
print(reprot_2015_df.head())2015年数据预览:Country          Region  Happiness Rank  Happiness Score  \
0  Switzerland  Western Europe               1            7.587
1      Iceland  Western Europe               2            7.561
2      Denmark  Western Europe               3            7.527
3       Norway  Western Europe               4            7.522
4       Canada   North America               5            7.427   Standard Error  Economy (GDP per Capita)   Family  \
0         0.03411                   1.39651  1.34951
1         0.04884                   1.30232  1.40223
2         0.03328                   1.32548  1.36058
3         0.03880                   1.45900  1.33095
4         0.03553                   1.32629  1.32261   Health (Life Expectancy)  Freedom  Trust (Government Corruption)  \
0                   0.94143  0.66557                        0.41978
1                   0.94784  0.62877                        0.14145
2                   0.87464  0.64938                        0.48357
3                   0.88521  0.66973                        0.36503
4                   0.90563  0.63297                        0.32957   Generosity  Dystopia Residual
0     0.29678            2.51738
1     0.43630            2.70201
2     0.34139            2.49204
3     0.34699            2.46531
4     0.45811            2.45176  print(reprot_2015_df.info())<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
Country                          158 non-null object
Region                           158 non-null object
Happiness Rank                   158 non-null int64
Happiness Score                  158 non-null float64
Standard Error                   158 non-null float64
Economy (GDP per Capita)         158 non-null float64
Family                           158 non-null float64
Health (Life Expectancy)         158 non-null float64
Freedom                          158 non-null float64
Trust (Government Corruption)    158 non-null float64
Generosity                       158 non-null float64
Dystopia Residual                158 non-null float64
dtypes: float64(9), int64(1), object(2)
memory usage: 14.9+ KB
Noneprint(reprot_2015_df.describe())Happiness Rank  Happiness Score  Standard Error  \
count      158.000000       158.000000      158.000000
mean        79.493671         5.375734        0.047885
std         45.754363         1.145010        0.017146
min          1.000000         2.839000        0.018480
25%         40.250000         4.526000        0.037268
50%         79.500000         5.232500        0.043940
75%        118.750000         6.243750        0.052300
max        158.000000         7.587000        0.136930   Economy (GDP per Capita)      Family  Health (Life Expectancy)  \
count                158.000000  158.000000                158.000000
mean                   0.846137    0.991046                  0.630259
std                    0.403121    0.272369                  0.247078
min                    0.000000    0.000000                  0.000000
25%                    0.545808    0.856823                  0.439185
50%                    0.910245    1.029510                  0.696705
75%                    1.158448    1.214405                  0.811013
max                    1.690420    1.402230                  1.025250   Freedom  Trust (Government Corruption)  Generosity  \
count  158.000000                     158.000000  158.000000
mean     0.428615                       0.143422    0.237296
std      0.150693                       0.120034    0.126685
min      0.000000                       0.000000    0.000000
25%      0.328330                       0.061675    0.150553
50%      0.435515                       0.107220    0.216130
75%      0.549092                       0.180255    0.309883
max      0.669730                       0.551910    0.795880   Dystopia Residual
count         158.000000
mean            2.098977
std             0.553550
min             0.328580
25%             1.759410
50%             2.095415
75%             2.462415
max             3.602140  print(reprot_2015_df.tail())Country                           Region  Happiness Rank  \
153   Rwanda               Sub-Saharan Africa             154
154    Benin               Sub-Saharan Africa             155
155    Syria  Middle East and Northern Africa             156
156  Burundi               Sub-Saharan Africa             157
157     Togo               Sub-Saharan Africa             158   Happiness Score  Standard Error  Economy (GDP per Capita)   Family  \
153            3.465         0.03464                   0.22208  0.77370
154            3.340         0.03656                   0.28665  0.35386
155            3.006         0.05015                   0.66320  0.47489
156            2.905         0.08658                   0.01530  0.41587
157            2.839         0.06727                   0.20868  0.13995   Health (Life Expectancy)  Freedom  Trust (Government Corruption)  \
153                   0.42864  0.59201                        0.55191
154                   0.31910  0.48450                        0.08010
155                   0.72193  0.15684                        0.18906
156                   0.22396  0.11850                        0.10062
157                   0.28443  0.36453                        0.10731   Generosity  Dystopia Residual
153     0.22628            0.67042
154     0.18260            1.63328
155     0.47179            0.32858
156     0.19727            1.83302
157     0.16681            1.56726


import pandas as pd
# 使用index_col指定索引列
# 使用usecols指定需要读取的列
reprot_2016_df = pd.read_csv('F:/2016.csv', index_col='Country',usecols=['Country', 'Happiness Rank', 'Happiness Score', 'Region'])
print(reprot_2016_df.head())Region  Happiness Rank  Happiness Score
Denmark      Western Europe               1            7.526
Switzerland  Western Europe               2            7.509
Iceland      Western Europe               3            7.501
Norway       Western Europe               4            7.498
Finland      Western Europe               5            7.413print('列名(column):', reprot_2016_df.columns)
print('行名(index):', reprot_2016_df.index)列名(column): Index(['Region', 'Happiness Rank', 'Happiness Score'], dtype='object')
行名(index): Index(['Denmark', 'Switzerland', 'Iceland', 'Norway', 'Finland', 'Canada','Netherlands', 'New Zealand', 'Australia', 'Sweden',...'Madagascar', 'Tanzania', 'Liberia', 'Guinea', 'Rwanda', 'Benin','Afghanistan', 'Togo', 'Syria', 'Burundi'],dtype='object', name='Country', length=157)# 注意index是不可变的
reprot_2016_df.index[0] = '丹麦'TypeError                                 Traceback (most recent call last)
<ipython-input-8-c2f1f4f940d2> in <module>()1 # 注意index是不可变的
----> 2 reprot_2016_df.index[0] = '丹麦'F:\python\ANACONDA\lib\site-packages\pandas\core\indexes\base.py in __setitem__(self, key, value)2048 2049     def __setitem__(self, key, value):
-> 2050         raise TypeError("Index does not support mutable operations")2051 2052     def __getitem__(self, key):TypeError: Index does not support mutable operations# 重置index
# 注意inplace加与不加的区别
print(reprot_2016_df.reset_index(inplace=True))Country          Region  Happiness Rank  Happiness Score
0      Denmark  Western Europe               1            7.526
1  Switzerland  Western Europe               2            7.509
2      Iceland  Western Europe               3            7.501
3       Norway  Western Europe               4            7.498
4      Finland  Western Europe               5            7.413# 重命名列名
reprot_2016_df = reprot_2016_df.rename(columns={'Region': '地区', 'Hapiness Rank': '排名', 'Hapiness Score': '幸福指数'})
peint(reprot_2016_df.head())Country              地区  排名   幸福指数
0      Denmark  Western Europe   1  7.526
1  Switzerland  Western Europe   2  7.509
2      Iceland  Western Europe   3  7.501
3       Norway  Western Europe   4  7.498
4      Finland  Western Europe   5  7.413# 重命名列名,注意inplace的使用
reprot_2016_df.rename(columns={'Region': '地区', 'Happiness Rank': '排名', 'Happiness Score': '幸福指数'},inplace=True)
print(reprot_2016_df.head())Country              地区  排名   幸福指数
0      Denmark  Western Europe   1  7.526
1  Switzerland  Western Europe   2  7.509
2      Iceland  Western Europe   3  7.501
3       Norway  Western Europe   4  7.498
4      Finland  Western Europe   5  7.413

六.Boolean Mask

print(reprot_2016_df.head())Country              地区  排名   幸福指数
0      Denmark  Western Europe   1  7.526
1  Switzerland  Western Europe   2  7.509
2      Iceland  Western Europe   3  7.501
3       Norway  Western Europe   4  7.498
4      Finland  Western Europe   5  7.413# 过滤 Western Europe 地区的国家
# only_western_europe = reprot_2016_df['地区'] == 'Western Europe'
print(reprot_2016_df[reprot_2016_df['地区'] == 'Western Europe'])Country              地区  排名   幸福指数
0          Denmark  Western Europe   1  7.526
1      Switzerland  Western Europe   2  7.509
2          Iceland  Western Europe   3  7.501
3           Norway  Western Europe   4  7.498
4          Finland  Western Europe   5  7.413
6      Netherlands  Western Europe   7  7.339
9           Sweden  Western Europe  10  7.291
11         Austria  Western Europe  12  7.119
15         Germany  Western Europe  16  6.994
17         Belgium  Western Europe  18  6.929
18         Ireland  Western Europe  19  6.907
19      Luxembourg  Western Europe  20  6.871
22  United Kingdom  Western Europe  23  6.725
29           Malta  Western Europe  30  6.488
31          France  Western Europe  32  6.478
36           Spain  Western Europe  37  6.361
49           Italy  Western Europe  50  5.977
61    North Cyprus  Western Europe  62  5.771
68          Cyprus  Western Europe  69  5.546
93        Portugal  Western Europe  94  5.123
98          Greece  Western Europe  99  5.033# 过滤 Western Europe 地区的国家
# 并且排名在10之外
only_western_europe_10 = (reprot_2016_df['地区'] == 'Western Europe') & (reprot_2016_df['排名'] > 10)
print(only_western_europe_10)0      False
1      False
2      False
3      False
4      False
5      False
6      False
7      False
8      False
9      False
10     False
11      True
12     False
13     False
14     False
15      True
16     False
17      True
18      True
19      True
20     False
21     False
22      True
23     False
24     False
25     False
26     False
27     False
28     False
29      True...
127    False
128    False
129    False
130    False
131    False
132    False
133    False
134    False
135    False
136    False
137    False
138    False
139    False
140    False
141    False
142    False
143    False
144    False
145    False
146    False
147    False
148    False
149    False
150    False
151    False
152    False
153    False
154    False
155    False
156    False
Length: 157, dtype: bool# 叠加 boolean mask 得到最终结果
print(reprot_2016_df[only_western_europe_10])Country              地区  排名   幸福指数
11         Austria  Western Europe  12  7.119
15         Germany  Western Europe  16  6.994
17         Belgium  Western Europe  18  6.929
18         Ireland  Western Europe  19  6.907
19      Luxembourg  Western Europe  20  6.871
22  United Kingdom  Western Europe  23  6.725
29           Malta  Western Europe  30  6.488
31          France  Western Europe  32  6.478
36           Spain  Western Europe  37  6.361
49           Italy  Western Europe  50  5.977
61    North Cyprus  Western Europe  62  5.771
68          Cyprus  Western Europe  69  5.546
93        Portugal  Western Europe  94  5.123
98          Greece  Western Europe  99  5.033# 熟练以后可以写在一行中
print(reprot_2016_df[(reprot_2016_df['地区'] == 'Western Europe') & (reprot_2016_df['排名'] > 10)])Country              地区  排名   幸福指数
11         Austria  Western Europe  12  7.119
15         Germany  Western Europe  16  6.994
17         Belgium  Western Europe  18  6.929
18         Ireland  Western Europe  19  6.907
19      Luxembourg  Western Europe  20  6.871
22  United Kingdom  Western Europe  23  6.725
29           Malta  Western Europe  30  6.488
31          France  Western Europe  32  6.478
36           Spain  Western Europe  37  6.361
49           Italy  Western Europe  50  5.977
61    North Cyprus  Western Europe  62  5.771
68          Cyprus  Western Europe  69  5.546
93        Portugal  Western Europe  94  5.123
98          Greece  Western Europe  99  5.033


print(reprot_2015_df.head())Country          Region  Happiness Rank  Happiness Score  \
0  Switzerland  Western Europe               1            7.587
1      Iceland  Western Europe               2            7.561
2      Denmark  Western Europe               3            7.527
3       Norway  Western Europe               4            7.522
4       Canada   North America               5            7.427   Standard Error  Economy (GDP per Capita)   Family  \
0         0.03411                   1.39651  1.34951
1         0.04884                   1.30232  1.40223
2         0.03328                   1.32548  1.36058
3         0.03880                   1.45900  1.33095
4         0.03553                   1.32629  1.32261   Health (Life Expectancy)  Freedom  Trust (Government Corruption)  \
0                   0.94143  0.66557                        0.41978
1                   0.94784  0.62877                        0.14145
2                   0.87464  0.64938                        0.48357
3                   0.88521  0.66973                        0.36503
4                   0.90563  0.63297                        0.32957   Generosity  Dystopia Residual
0     0.29678            2.51738
1     0.43630            2.70201
2     0.34139            2.49204
3     0.34699            2.46531
4     0.45811            2.45176  # 设置层级索引
report_2015_df2 = reprot_2015_df.set_index(['Region', 'Country'])
print(report_2015_df2.head(20))# level0 索引
print(report_2015_df2.loc['Western Europe'])# 两层索引
print(report_2015_df2.loc['Western Europe', 'Switzerland'])# 交换分层顺序


