第1章 Pandas基础
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
查看Pandas版本
pd.__version__
'1.0.3'
pd.__version__
'1.0.3'
一、文件读取与写入
1. 读取
(a)csv格式
df = pd.read_csv('data/table.csv')
df.head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
df=pd.read_csv('data/table.csv')
df
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
5
|
S_1
|
C_2
|
1201
|
M
|
street_5
|
188
|
68
|
97.0
|
A-
|
6
|
S_1
|
C_2
|
1202
|
F
|
street_4
|
176
|
94
|
63.5
|
B-
|
7
|
S_1
|
C_2
|
1203
|
M
|
street_6
|
160
|
53
|
58.8
|
A+
|
8
|
S_1
|
C_2
|
1204
|
F
|
street_5
|
162
|
63
|
33.8
|
B
|
9
|
S_1
|
C_2
|
1205
|
F
|
street_6
|
167
|
63
|
68.4
|
B-
|
10
|
S_1
|
C_3
|
1301
|
M
|
street_4
|
161
|
68
|
31.5
|
B+
|
11
|
S_1
|
C_3
|
1302
|
F
|
street_1
|
175
|
57
|
87.7
|
A-
|
12
|
S_1
|
C_3
|
1303
|
M
|
street_7
|
188
|
82
|
49.7
|
B
|
13
|
S_1
|
C_3
|
1304
|
M
|
street_2
|
195
|
70
|
85.2
|
A
|
14
|
S_1
|
C_3
|
1305
|
F
|
street_5
|
187
|
69
|
61.7
|
B-
|
15
|
S_2
|
C_1
|
2101
|
M
|
street_7
|
174
|
84
|
83.3
|
C
|
16
|
S_2
|
C_1
|
2102
|
F
|
street_6
|
161
|
61
|
50.6
|
B+
|
17
|
S_2
|
C_1
|
2103
|
M
|
street_4
|
157
|
61
|
52.5
|
B-
|
18
|
S_2
|
C_1
|
2104
|
F
|
street_5
|
159
|
97
|
72.2
|
B+
|
19
|
S_2
|
C_1
|
2105
|
M
|
street_4
|
170
|
81
|
34.2
|
A
|
20
|
S_2
|
C_2
|
2201
|
M
|
street_5
|
193
|
100
|
39.1
|
B
|
21
|
S_2
|
C_2
|
2202
|
F
|
street_7
|
194
|
77
|
68.5
|
B+
|
22
|
S_2
|
C_2
|
2203
|
M
|
street_4
|
155
|
91
|
73.8
|
A+
|
23
|
S_2
|
C_2
|
2204
|
M
|
street_1
|
175
|
74
|
47.2
|
B-
|
24
|
S_2
|
C_2
|
2205
|
F
|
street_7
|
183
|
76
|
85.4
|
B
|
25
|
S_2
|
C_3
|
2301
|
F
|
street_4
|
157
|
78
|
72.3
|
B+
|
26
|
S_2
|
C_3
|
2302
|
M
|
street_5
|
171
|
88
|
32.7
|
A
|
27
|
S_2
|
C_3
|
2303
|
F
|
street_7
|
190
|
99
|
65.9
|
C
|
28
|
S_2
|
C_3
|
2304
|
F
|
street_6
|
164
|
81
|
95.5
|
A-
|
29
|
S_2
|
C_3
|
2305
|
M
|
street_4
|
187
|
73
|
48.9
|
B
|
30
|
S_2
|
C_4
|
2401
|
F
|
street_2
|
192
|
62
|
45.3
|
A
|
31
|
S_2
|
C_4
|
2402
|
M
|
street_7
|
166
|
82
|
48.7
|
B
|
32
|
S_2
|
C_4
|
2403
|
F
|
street_6
|
158
|
60
|
59.7
|
B+
|
33
|
S_2
|
C_4
|
2404
|
F
|
street_2
|
160
|
84
|
67.7
|
B
|
34
|
S_2
|
C_4
|
2405
|
F
|
street_6
|
193
|
54
|
47.6
|
B
|
(b)txt格式
df_txt = pd.read_table('data/table.txt') #可设置sep分隔符参数
df_txt
|
col1
|
col2
|
col3
|
col4
|
0
|
2
|
a
|
1.4
|
apple
|
1
|
3
|
b
|
3.4
|
banana
|
2
|
6
|
c
|
2.5
|
orange
|
3
|
5
|
d
|
3.2
|
lemon
|
df_txt=pd.read_table('data/table.txt')
df_txt.head()
|
col1
|
col2
|
col3
|
col4
|
0
|
2
|
a
|
1.4
|
apple
|
1
|
3
|
b
|
3.4
|
banana
|
2
|
6
|
c
|
2.5
|
orange
|
3
|
5
|
d
|
3.2
|
lemon
|
(c)xls或xlsx格式
#需要安装xlrd包
df_excel = pd.read_excel('data/table.xlsx')
df_excel.head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
df_excel=pd.read_excel('data/table.xlsx')
df_excel.head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
2. 写入
(a)csv格式
df.to_csv('data/new_table.csv')
#df.to_csv('data/new_table.csv', index=False) #保存时除去行索引
df.to_csv('data/new_table.csv')
(b)xls或xlsx格式
#需要安装openpyxl
df.to_excel('data/new_table2.xlsx', sheet_name='Sheet1')
df.to_excel('data/new_table2.xlsx',sheet_name='Sheet1')
二、基本数据结构
1. Series
(a)创建一个Series
对于一个Series,其中最常用的属性为值(values),索引(index),名字(name),类型(dtype)
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e'],name='这是一个Series',dtype='float64')
s
a 0.302995
b 0.573438
c 0.536086
d 0.513209
e -1.263579
Name: 这是一个Series, dtype: float64
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'],name='这是一个Series',dtype='float64')
s
a -0.314615
b -1.307312
c 0.721136
d 1.841850
e 0.521665
Name: 这是一个Series, dtype: float64
(b)访问Series属性
s.values
s.values
array([-0.31461451, -1.3073125 , 0.7211358 , 1.84184984, 0.52166547])
s.name
s.name
'这是一个Series'
s.index
s.index
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
s.dtype
s.dtype
dtype('float64')
(c)取出某一个元素
将在第2章详细讨论索引的应用,这里先大致了解
s['a']
s['b']
-1.3073124966290814
(d)调用方法
s.mean()
0.1324296778903958
Series有相当多的方法可以调用:
print([attr for attr in dir(s) if not attr.startswith('_')])
['T', 'a', 'abs', 'add', 'add_prefix', 'add_suffix', 'agg', 'aggregate', 'align', 'all', 'any', 'append', 'apply', 'argmax', 'argmin', 'argsort', 'array', 'asfreq', 'asof', 'astype', 'at', 'at_time', 'attrs', 'autocorr', 'axes', 'b', 'between', 'between_time', 'bfill', 'bool', 'c', 'clip', 'combine', 'combine_first', 'convert_dtypes', 'copy', 'corr', 'count', 'cov', 'cummax', 'cummin', 'cumprod', 'cumsum', 'd', 'describe', 'diff', 'div', 'divide', 'divmod', 'dot', 'drop', 'drop_duplicates', 'droplevel', 'dropna', 'dtype', 'dtypes', 'duplicated', 'e', 'empty', 'eq', 'equals', 'ewm', 'expanding', 'explode', 'factorize', 'ffill', 'fillna', 'filter', 'first', 'first_valid_index', 'floordiv', 'ge', 'get', 'groupby', 'gt', 'hasnans', 'head', 'hist', 'iat', 'idxmax', 'idxmin', 'iloc', 'index', 'infer_objects', 'interpolate', 'is_monotonic', 'is_monotonic_decreasing', 'is_monotonic_increasing', 'is_unique', 'isin', 'isna', 'isnull', 'item', 'items', 'iteritems', 'keys', 'kurt', 'kurtosis', 'last', 'last_valid_index', 'le', 'loc', 'lt', 'mad', 'map', 'mask', 'max', 'mean', 'median', 'memory_usage', 'min', 'mod', 'mode', 'mul', 'multiply', 'name', 'nbytes', 'ndim', 'ne', 'nlargest', 'notna', 'notnull', 'nsmallest', 'nunique', 'pct_change', 'pipe', 'plot', 'pop', 'pow', 'prod', 'product', 'quantile', 'radd', 'rank', 'ravel', 'rdiv', 'rdivmod', 'reindex', 'reindex_like', 'rename', 'rename_axis', 'reorder_levels', 'repeat', 'replace', 'resample', 'reset_index', 'rfloordiv', 'rmod', 'rmul', 'rolling', 'round', 'rpow', 'rsub', 'rtruediv', 'sample', 'searchsorted', 'sem', 'set_axis', 'shape', 'shift', 'size', 'skew', 'slice_shift', 'sort_index', 'sort_values', 'squeeze', 'std', 'sub', 'subtract', 'sum', 'swapaxes', 'swaplevel', 'tail', 'take', 'to_clipboard', 'to_csv', 'to_dict', 'to_excel', 'to_frame', 'to_hdf', 'to_json', 'to_latex', 'to_list', 'to_markdown', 'to_numpy', 'to_period', 'to_pickle', 'to_sql', 'to_string', 'to_timestamp', 'to_xarray', 'transform', 'transpose', 'truediv', 'truncate', 'tshift', 'tz_convert', 'tz_localize', 'unique', 'unstack', 'update', 'value_counts', 'values', 'var', 'view', 'where', 'xs']
2. DataFrame
(a)创建一个DataFrame
df = pd.DataFrame({'col1':list('abcde'),'col2':range(5,10),'col3':[1.3,2.5,3.6,4.6,5.8]},index=list('一二三四五'))
df
|
col1
|
col2
|
col3
|
一
|
a
|
5
|
1.3
|
二
|
b
|
6
|
2.5
|
三
|
c
|
7
|
3.6
|
四
|
d
|
8
|
4.6
|
五
|
e
|
9
|
5.8
|
df=pd.DataFrame({'col1':list('abcde'),'col2':range(5,10),'col3':[1.3,2.5,3.6,4.6,5.8]},index=list('一二三四五'))
df
|
col1
|
col2
|
col3
|
一
|
a
|
5
|
1.3
|
二
|
b
|
6
|
2.5
|
三
|
c
|
7
|
3.6
|
四
|
d
|
8
|
4.6
|
五
|
e
|
9
|
5.8
|
(b)从DataFrame取出一列为Series
df['col1']
df['col1']
一 a
二 b
三 c
四 d
五 e
Name: col1, dtype: object
type(df)
type(df)
pandas.core.frame.DataFrame
type(df['col1'])
type(df['col1'])
pandas.core.series.Series
(c)修改行或列名
df.rename(index={'一':'one'},columns={'col1':'new_col1'})
df.rename(index={'一':'one'},columns={'col1':'new_col1'})
|
new_col1
|
col2
|
col3
|
one
|
a
|
5
|
1.3
|
二
|
b
|
6
|
2.5
|
三
|
c
|
7
|
3.6
|
四
|
d
|
8
|
4.6
|
五
|
e
|
9
|
5.8
|
(d)调用属性和方法
df.index
df.index
Index(['一', '二', '三', '四', '五'], dtype='object')
df.columns
df.columns
Index(['col1', 'col2', 'col3'], dtype='object')
df.values
df.values
array([['a', 5, 1.3],['b', 6, 2.5],['c', 7, 3.6],['d', 8, 4.6],['e', 9, 5.8]], dtype=object)
df.shape
df.shape
(5, 3)
df.mean() #本质上是一种Aggregation操作,将在第3章详细介绍
df.mean()
col2 7.00
col3 3.56
dtype: float64
(e)索引对齐特性
这是Pandas中非常强大的特性,不理解这一特性有时就会造成一些麻烦
df1=pd.DataFrame({'A':[1,2,3]},index=[1,2,3])
df2=pd.DataFrame({'A':[1,2,3]},index=[1,3,2])
df1-df2
df1 = pd.DataFrame({'A':[1,2,3]},index=[1,2,3])
df2 = pd.DataFrame({'A':[1,2,3]},index=[3,1,2])
df1-df2 #由于索引对齐,因此结果不是0
(f)列的删除与添加
对于删除而言,可以使用drop函数或del或pop
df.drop(index='五',columns='col1') #设置inplace=True后会直接在原DataFrame中改动
|
col2
|
col3
|
一
|
5
|
1.3
|
二
|
6
|
2.5
|
三
|
7
|
3.6
|
四
|
8
|
4.6
|
test=df.drop(index='五')
test.drop(columns='col1')
df.drop(index='五',columns='col1')test.head()
|
col1
|
col2
|
col3
|
一
|
a
|
5
|
1.3
|
二
|
b
|
6
|
2.5
|
三
|
c
|
7
|
3.6
|
四
|
d
|
8
|
4.6
|
五
|
e
|
9
|
5.8
|
df['col1']=[1,2,3,4,5]
del df['col1']
df
|
col2
|
col3
|
一
|
5
|
1.3
|
二
|
6
|
2.5
|
三
|
7
|
3.6
|
四
|
8
|
4.6
|
五
|
9
|
5.8
|
pop方法直接在原来的DataFrame上操作,且返回被删除的列,与python中的pop函数类似
df['col1']=[1,2,3,4,5]
df.pop('col1')
一 1
二 2
三 3
四 4
五 5
Name: col1, dtype: int64
df['col1']=[1,2,3,4,5]
df.pop('col1')
一 1
二 2
三 3
四 4
五 5
Name: col1, dtype: int64
df
|
col2
|
col3
|
一
|
5
|
1.3
|
二
|
6
|
2.5
|
三
|
7
|
3.6
|
四
|
8
|
4.6
|
五
|
9
|
5.8
|
可以直接增加新的列,也可以使用assign方法
df1['B']=list('abc')
df1['B']=list('abc')
df1
df1.assign(C=pd.Series(list('def'),index=[1,2,3]))
#pd.Series(list('def'))
但assign方法不会对原DataFrame做修改
df1
(g)根据类型选择列
df
|
col2
|
col3
|
一
|
5
|
1.3
|
二
|
6
|
2.5
|
三
|
7
|
3.6
|
四
|
8
|
4.6
|
五
|
9
|
5.8
|
df.select_dtypes(include=['number']).head()
df.select_dtypes(include=['number']).head()
|
col2
|
col3
|
一
|
5
|
1.3
|
二
|
6
|
2.5
|
三
|
7
|
3.6
|
四
|
8
|
4.6
|
五
|
9
|
5.8
|
df.select_dtypes(include=['float']).head()
df.select_dtypes(include=['object']).head()
(h)将Series转换为DataFrame
s = df.mean()
s.name='to_DataFrame'
s
col2 7.00
col3 3.56
Name: to_DataFrame, dtype: float64
s=df.mean()
s.name='to_DataFrame'
s
col2 7.00
col3 3.56
Name: to_DataFrame, dtype: float64
s.to_frame()
col2 7.00
col3 3.56
Name: to_DataFrame, dtype: float64
使用T符号可以转置
s.to_frame().T
s.to_frame().T
|
col2
|
col3
|
to_DataFrame
|
7.0
|
3.56
|
三、常用基本函数
从下面开始,包括后面所有章节,我们都会用到这份虚拟的数据集
df = pd.read_csv('data/table.csv')
df=pd.read_csv('data/table.csv')
1. head和tail
df.head()
df.head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
df.tail()
df.tail()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
30
|
S_2
|
C_4
|
2401
|
F
|
street_2
|
192
|
62
|
45.3
|
A
|
31
|
S_2
|
C_4
|
2402
|
M
|
street_7
|
166
|
82
|
48.7
|
B
|
32
|
S_2
|
C_4
|
2403
|
F
|
street_6
|
158
|
60
|
59.7
|
B+
|
33
|
S_2
|
C_4
|
2404
|
F
|
street_2
|
160
|
84
|
67.7
|
B
|
34
|
S_2
|
C_4
|
2405
|
F
|
street_6
|
193
|
54
|
47.6
|
B
|
可以指定n参数显示多少行
df.head(3)
df.head(1)
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
2. unique和nunique
nunique显示有多少个唯一值
df['Physics'].nunique()
df['ID'].nunique()
35
unique显示所有的唯一值
df['Physics'].unique()
df['ID'].unique()
array([1101, 1102, 1103, 1104, 1105, 1201, 1202, 1203, 1204, 1205, 1301,1302, 1303, 1304, 1305, 2101, 2102, 2103, 2104, 2105, 2201, 2202,2203, 2204, 2205, 2301, 2302, 2303, 2304, 2305, 2401, 2402, 2403,2404, 2405], dtype=int64)
3. count和value_counts
count返回非缺失值元素个数
df['Physics'].count()
df['ID'].count()
35
value_counts返回每个元素有多少个
df['Physics'].value_counts()
df['Physics'].value_counts()
B+ 9
B 8
B- 6
A 4
A+ 3
A- 3
C 2
Name: Physics, dtype: int64
4. describe和info
info函数返回有哪些列、有多少非缺失值、每列的类型
df.info()
#df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 9 columns):# Column Non-Null Count Dtype
--- ------ -------------- ----- 0 School 35 non-null object 1 Class 35 non-null object 2 ID 35 non-null int64 3 Gender 35 non-null object 4 Address 35 non-null object 5 Height 35 non-null int64 6 Weight 35 non-null int64 7 Math 35 non-null float648 Physics 35 non-null object
dtypes: float64(1), int64(3), object(5)
memory usage: 2.6+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 9 columns):# Column Non-Null Count Dtype
--- ------ -------------- ----- 0 School 35 non-null object 1 Class 35 non-null object 2 ID 35 non-null int64 3 Gender 35 non-null object 4 Address 35 non-null object 5 Height 35 non-null int64 6 Weight 35 non-null int64 7 Math 35 non-null float648 Physics 35 non-null object
dtypes: float64(1), int64(3), object(5)
memory usage: 2.6+ KB
describe默认统计数值型数据的各个统计量
df.describe()
df.describe()
|
ID
|
Height
|
Weight
|
Math
|
count
|
35.00000
|
35.000000
|
35.000000
|
35.000000
|
mean
|
1803.00000
|
174.142857
|
74.657143
|
61.351429
|
std
|
536.87741
|
13.541098
|
12.895377
|
19.915164
|
min
|
1101.00000
|
155.000000
|
53.000000
|
31.500000
|
25%
|
1204.50000
|
161.000000
|
63.000000
|
47.400000
|
50%
|
2103.00000
|
173.000000
|
74.000000
|
61.700000
|
75%
|
2301.50000
|
187.500000
|
82.000000
|
77.100000
|
max
|
2405.00000
|
195.000000
|
100.000000
|
97.000000
|
可以自行选择分位数
df.describe(percentiles=[.05, .25, .75, .95])
df.describe(percentiles=[.05,0.25,.85])
|
ID
|
Height
|
Weight
|
Math
|
count
|
35.00000
|
35.000000
|
35.000000
|
35.000000
|
mean
|
1803.00000
|
174.142857
|
74.657143
|
61.351429
|
std
|
536.87741
|
13.541098
|
12.895377
|
19.915164
|
min
|
1101.00000
|
155.000000
|
53.000000
|
31.500000
|
5%
|
1102.70000
|
157.000000
|
56.100000
|
32.640000
|
25%
|
1204.50000
|
161.000000
|
63.000000
|
47.400000
|
50%
|
2103.00000
|
173.000000
|
74.000000
|
61.700000
|
85%
|
2304.90000
|
191.800000
|
87.600000
|
85.160000
|
max
|
2405.00000
|
195.000000
|
100.000000
|
97.000000
|
对于非数值型也可以用describe函数
df['Physics'].describe()
df['Physics'].describe()
count 35
unique 7
top B+
freq 9
Name: Physics, dtype: object
5. idxmax和nlargest
idxmax函数返回最大值,在某些情况下特别适用,idxmin功能类似
df['Math'].idxmax()
df['Math'].idxmax()
#df['Math'].max()
5
nlargest函数返回前几个大的元素值,nsmallest功能类似
df['Math'].nlargest(3)
df['Math'].nlargest(5)
5 97.0
28 95.5
11 87.7
2 87.2
24 85.4
Name: Math, dtype: float64
6. clip和replace
clip和replace是两类替换函数
clip是对超过或者低于某些值的数进行截断
df['Math'].head()
df['Math'].head()
0 34.0
1 32.5
2 87.2
3 80.4
4 84.8
Name: Math, dtype: float64
df['Math'].clip(33,80).head()
df['Math'].clip(33,80).head()
0 34.0
1 32.5
2 87.2
3 80.4
4 84.8
Name: Math, dtype: float64
df['Math'].mad()
df['Math'].clip(33,80).mad()
15.021387755102042
replace是对某些值进行替换
df['Address'].head()
df['Address'].head()
0 street_1
1 street_2
2 street_2
3 street_2
4 street_4
Name: Address, dtype: object
df['Address'].replace(['street_1','street_2'],['one','two']).head()
df['Address'].replace(['street_1','street_2'],['one1','two']).head()
0 one1
1 two
2 two
3 two
4 street_4
Name: Address, dtype: object
通过字典,可以直接在表中修改
df.replace({'Address':{'street_1':'one','street_2':'two'}}).head()
df.replace({'Address':{'street_1':'one','street_2':'two'}}).head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
one
|
173
|
63
|
34.0
|
A+
|
1
|
S_1
|
C_1
|
1102
|
F
|
two
|
192
|
73
|
32.5
|
B+
|
2
|
S_1
|
C_1
|
1103
|
M
|
two
|
186
|
82
|
87.2
|
B+
|
3
|
S_1
|
C_1
|
1104
|
F
|
two
|
167
|
81
|
80.4
|
B-
|
4
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
84.8
|
B+
|
7. apply函数
apply是一个自由度很高的函数,在第3章我们还要提到
对于Series,它可以迭代每一列的值操作:
df['Math'].apply(lambda x:str(x)+'!').head() #可以使用lambda表达式,也可以使用函数
df['Math'].apply(lambda x:str(x)+'!').head()
0 34.0!?
1 32.5!?
2 87.2!?
3 80.4!?
4 84.8!?
Name: Math, dtype: object
对于DataFrame,它可以迭代每一个列操作:
df.apply(lambda x:x.apply(lambda x:str(x)+'!')).head() #这是一个稍显复杂的例子,有利于理解apply的功能
df.apply(lambda x:x.apply(lambda x:str(x)+'!')).head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1!
|
C_1!
|
1101!
|
M!
|
street_1!
|
173!
|
63!
|
34.0!
|
A+!
|
1
|
S_1!
|
C_1!
|
1102!
|
F!
|
street_2!
|
192!
|
73!
|
32.5!
|
B+!
|
2
|
S_1!
|
C_1!
|
1103!
|
M!
|
street_2!
|
186!
|
82!
|
87.2!
|
B+!
|
3
|
S_1!
|
C_1!
|
1104!
|
F!
|
street_2!
|
167!
|
81!
|
80.4!
|
B-!
|
4
|
S_1!
|
C_1!
|
1105!
|
F!
|
street_4!
|
159!
|
64!
|
84.8!
|
B+!
|
四、排序
1. 索引排序
df.set_index('Math').head() #set_index函数可以设置索引,将在下一章详细介绍
df.set_index('Math').head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Physics
|
Math
|
|
|
|
|
|
|
|
|
34.0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
A+
|
32.5
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
B+
|
87.2
|
S_1
|
C_1
|
1103
|
M
|
street_2
|
186
|
82
|
B+
|
80.4
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
B-
|
84.8
|
S_1
|
C_1
|
1105
|
F
|
street_4
|
159
|
64
|
B+
|
df.set_index('Math').sort_index().head() #可以设置ascending参数,默认为升序,True
df.set_index('Math').sort_index().head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Physics
|
Math
|
|
|
|
|
|
|
|
|
31.5
|
S_1
|
C_3
|
1301
|
M
|
street_4
|
161
|
68
|
B+
|
32.5
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
B+
|
32.7
|
S_2
|
C_3
|
2302
|
M
|
street_5
|
171
|
88
|
A
|
33.8
|
S_1
|
C_2
|
1204
|
F
|
street_5
|
162
|
63
|
B
|
34.0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
A+
|
2. 值排序
df.sort_values(by='Class').head()
df.sort_values(by='Math').head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
10
|
S_1
|
C_3
|
1301
|
M
|
street_4
|
161
|
68
|
31.5
|
B+
|
1
|
S_1
|
C_1
|
1102
|
F
|
street_2
|
192
|
73
|
32.5
|
B+
|
26
|
S_2
|
C_3
|
2302
|
M
|
street_5
|
171
|
88
|
32.7
|
A
|
8
|
S_1
|
C_2
|
1204
|
F
|
street_5
|
162
|
63
|
33.8
|
B
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
多个值排序,即先对第一层排,在第一层相同的情况下对第二层排序
df.sort_values(by=['Address','Height']).head()
df.sort_values(by=['Address','Height']).head()
|
School
|
Class
|
ID
|
Gender
|
Address
|
Height
|
Weight
|
Math
|
Physics
|
0
|
S_1
|
C_1
|
1101
|
M
|
street_1
|
173
|
63
|
34.0
|
A+
|
11
|
S_1
|
C_3
|
1302
|
F
|
street_1
|
175
|
57
|
87.7
|
A-
|
23
|
S_2
|
C_2
|
2204
|
M
|
street_1
|
175
|
74
|
47.2
|
B-
|
33
|
S_2
|
C_4
|
2404
|
F
|
street_2
|
160
|
84
|
67.7
|
B
|
3
|
S_1
|
C_1
|
1104
|
F
|
street_2
|
167
|
81
|
80.4
|
B-
|
五、问题与练习
1. 问题
【问题一】 Series和DataFrame有哪些常见属性和方法?
【问题二】 value_counts会统计缺失值吗?
【问题三】 与idxmax和nlargest功能相反的是哪两组函数?
【问题四】 在常用函数一节中,由于一些函数的功能比较简单,因此没有列入,现在将它们列在下面,请分别说明它们的用途并尝试使用。
sum/mean/median/mad/min/max/abs/std/var/quantile/cummax/cumsum/cumprod
【问题五】 df.mean(axis=1)是什么意思?它与df.mean()的结果一样吗?第一问提到的函数也有axis参数吗?怎么使用?
2. 练习
【练习一】 现有一份关于美剧《权力的游戏》剧本的数据集,请解决以下问题:
(a)在所有的数据中,一共出现了多少人物?
(b)以单元格计数(即简单把一个单元格视作一句),谁说了最多的话?
(c)以单词计数,谁说了最多的单词?
(a)
df=pd.read_csv('data/Game_of_Thrones_Script.csv')
pd.read_csv('data/Game_of_Thrones_Script.csv').head()
|
Release Date
|
Season
|
Episode
|
Episode Title
|
Name
|
Sentence
|
0
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
waymar royce
|
What do you expect? They're savages. One lot s...
|
1
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
will
|
I've never seen wildlings do a thing like this...
|
2
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
waymar royce
|
How close did you get?
|
3
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
will
|
Close as any man would.
|
4
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
gared
|
We should head back to the wall.
|
df['Name'].nunique()
564
(b)
df['Name'].value_counts().nlargest(1)
tyrion lannister 1760
Name: Name, dtype: int64
(c)
df_words=df.assign(Words=df['Sentence'].apply(lambda x:len(x.split())))
df_words.head()
|
Release Date
|
Season
|
Episode
|
Episode Title
|
Name
|
Sentence
|
Words
|
0
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
waymar royce
|
What do you expect? They're savages. One lot s...
|
25
|
1
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
will
|
I've never seen wildlings do a thing like this...
|
21
|
2
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
waymar royce
|
How close did you get?
|
5
|
3
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
will
|
Close as any man would.
|
5
|
4
|
2011/4/17
|
Season 1
|
Episode 1
|
Winter is Coming
|
gared
|
We should head back to the wall.
|
7
|
法一
df_words.groupby('Name')['Words'].sum().sort_values().tail(1)
Name
tyrion lannister 26009
Name: Words, dtype: int64
法二
L_count={}
N_words=list(zip(df_words['Name'],df_words['Words']))
for i in N_words:if i[0] in L_count:L_count[i[0]]+=i[1]else:L_count[i[0]]=i[1]
max_name=max(L_count.keys(),key=(lambda k:L_count[k]))
print(max_name)
tyrion lannister
法三
L_count={}
N_words=list(zip(df_words['Name'],df_words['Words']))
for i in N_words:if i[0] in L_count:L_count[i[0]]+=i[1]else:L_count[i[0]]=i[1]
sorted(L_count.items(),key=(lambda k:k[1]),reverse=True)
[('tyrion lannister', 26009),('cersei lannister', 14442),('daenerys targaryen', 12358),('jon snow', 12298),('jaime lannister', 11735),('sansa stark', 8135),('petyr baelish', 7101),('davos', 6842),('arya stark', 6448),('varys', 6397),('tywin lannister', 5493),('theon greyjoy', 5054),('sam', 4574),('bronn', 4354),('jorah mormont', 4271),('brienne', 3923),('stannis baratheon', 3674),('robb stark', 3625),('olenna tyrell', 3320),('catelyn stark', 3303),('bran stark', 3296),('melisandre', 3283),('eddard stark', 3241),('ramsay bolton', 3229),('margaery tyrell', 3155),('joffrey lannister', 3024),('sandor clegane', 2760),('sparrow', 2700),('man', 2570),('robert baratheon', 2396),('daario', 2246),('ygritte', 2184),('tormund', 2037),('gendry baratheon', 1996),('missandei', 1953),('sam tarly', 1941),('oberyn martell', 1906),('yara greyjoy', 1840),('shae', 1803),('osha', 1660),('gilly', 1491),('roose', 1414),('jaqen hghar', 1406),('tommen lannister', 1381),('qyburn', 1321),('talisa', 1315),('podrick', 1295),('euron greyjoy', 1294),('grey worm', 1231),('mance', 1209),('thoros', 1177),('sandor', 1142),('beric', 1109),('alliser thorne', 1086),('walder', 1051),('shireen', 1029),('barristan', 942),('hot pie', 938),('yoren', 922),('marwyn', 895),('pycelle', 842),('lysa', 834),('xaro', 822),('loras', 813),('grand maester pycelle', 796),('qhorin', 793),('hizdahr', 790),('balon', 785),('viserys targaryen', 775),('luwin', 745),('locke', 734),('meera', 720),('randyll', 715),('ellaria', 709),('brynden', 703),('lancel', 693),('renly', 684),('ray', 677),('spice king', 676),('jeor mormont', 664),('grenn', 663),('jojen', 659),('edmure', 645),('petyr', 637),('alliser', 621),('ros', 617),('aemon', 615),('maester aemon', 612),('roose bolton', 602),('tycho', 600),('selyse', 592),('renly baratheon', 569),('lady crane', 568),('walder frey', 556),('lyanna', 556),('janos', 541),('doreah', 517),('doran', 515),('soldier', 512),('myranda', 508),('craster', 500),('dolorous edd', 495),('rickard karstark', 456),('syrio forel', 454),('mace', 447),('septon', 441),('matthos', 418),('woman', 404),('lysa arryn', 398),('waif', 391),('kevan', 389),('guard', 388),('farmer hamlet', 354),('tanner', 354),('orell', 344),('khal moro', 343),('polliver', 342),('maester luwin', 341),('alton', 341),('benjen', 329),('mirri maz duur', 329),('jeor', 307),('anguy', 297),('olyvar', 297),('kinvara', 294),('edd', 293),('viserys', 288),('loras tyrell', 288),('rast', 284),('pyp', 283),('robin', 277),('myrcella', 271),('mero', 269),('mossador', 267),('salladhor', 262),('benjen stark', 258),('barristan selmy', 258),('robett', 254),('ser dontos', 253),('men', 252),('septa mordane', 251),('olly', 245),('saan', 241),('jory cassel', 237),('izembaro', 228),('fennesz', 224),('smalljon', 221),('malko', 220),('lord royce', 219),('styr', 207),('janos slynt', 206),('maester', 206),('old nan', 205),('tyene', 205),('drogon', 205),('sallador', 204),('bobono', 204),('irri', 199),('threeeyed raven', 194),('wolkan', 193),('karl tanner', 190),('obara', 189),('greatjon umber', 187),('black walder', 187),('lord mormont', 186),('radzal mo eraz', 185),('roz', 179),('illyrio', 177),('pyat pree', 173),('dothraki matron', 170),('meryn', 169),('bloodrider', 168),('lady anya', 165),('aeron', 161),('maester pycelle', 154),('dagmer', 153),('lord', 150),('rodrik', 150),('moles town whore', 150),('rakharo', 149),('ed', 149),('lothar', 148),('frey soldier', 147),('all', 143),('lord of bones', 142),('meryn trant', 140),('haylene', 139),('yohn royce', 138),('karsi', 137),('dickon', 137),('kraznys', 136),('yezzan', 136),('ser jorah', 135),('marillion', 134),('royce', 133),('cressen', 131),('black lorren', 128),('melessa', 125),('old man', 120),('gold cloak', 119),('camello', 119),('nymeria', 118),('loboda', 118),('leader', 118),('will', 117),('razdal', 117),('clarenzo', 111),('lord varys', 109),('male singer', 109),('winterfell shepherd', 108),('trystane', 108),('kevan lannister', 106),('dying man', 105),('lady olenna', 105),('derryk', 104),('storyteller', 104),('wildling', 102),('qotho', 98),('wine merchant', 95),('violet', 94),('ralf', 94),('maggy', 93),('khal drogo', 90),('pyatt pree', 90),('drogo', 90),('harrag', 90),('rorge', 89),('mhaegen', 88),('dim dalba', 88),('young hodor', 87),('captain', 86),('rodrick cassel', 83),('rickon', 83),('kraznys mo nakloz', 83),('banker', 83),('eddark stark', 82),('rennick', 82),('lord karstark', 80),('talla', 80),('rider', 79),('slave owner', 77),('gatins', 75),('mountain', 74),('priestess', 72),('elaria', 72),('crowd', 71),('tobho mott', 70),('knight', 69),('khal', 69),('guard captain', 69),('quaith', 68),('glover', 68),('yohn', 68),('girl', 67),('morgan', 67),('red priest', 66),('alliser throne', 66),('maester wolkan', 65),('prostitute', 63),('prisoner', 63),('quorin', 63),('septa unella', 63),('quaithe', 62),('lollys stokeworth', 62),('manderly', 62),('announcer', 61),('all together', 61),('lady walda', 61),('amory', 59),('mordane', 58),('kovarro', 58),('prendahl', 58),('lancel lannister', 57),('robin arryn', 56),('vala', 54),('lollys', 53),('nights watchman', 52),('illyrio mopatis', 51),('steelshanks walton', 51),('wife', 51),('ladyc rane', 51),('blackfish', 50),('priest', 49),('archmaester', 49),('young ned', 48),('women', 47),('areo', 47),('black haired prostitute', 47),('lhazareen woman', 46),('pypar', 45),('pycell', 45),('aerson', 45),('red priestess', 45),('handmaiden', 44),('torturer', 44),('hizdahr zo loraq', 44),('jonos bracken', 43),('martyn', 43),('crayah', 43),('vardis egen', 42),('sam pyp and grenn', 42),('elder meereen slave', 42),('king joffrey', 41),('ser barristan', 41),('kings soldier', 41),('steward', 40),('hodor', 40),('drowned priest', 39),('child', 39),('shadow tower brother', 39),('arthur', 39),('reginald', 38),('harry', 38),('ser vardis', 37),('lommy greenhands', 37),('wounded soldier', 37),('tickler', 37),('frey guard', 37),('head', 37),('lem', 37),('dirah', 37),('mord', 36),('dornish lord', 36),('yarwyck', 36),('alliser thorn', 36),('thin man', 36),('melara', 35),('owner', 35),('lommy', 34),('pig farmer', 34),('militant', 34),('kingsguard', 34),('teela', 34),('maid', 33),('blacksmith', 33),('morag', 33),('leaf', 33),('rickard', 32),('cassel', 31),('messenger', 31),('kings landing page', 31),('waymar royce', 30),('kings landing guard', 30),('protester', 30),('frey men', 30),('bianca', 30),('nora', 30),('gared', 29),('rattleshirt', 29),('pyelle', 29),('rickon stark', 27),('marei', 27),('robb dwarf', 27),('braavosi man', 27),('mycah', 26),('rodrik cassel', 26),('othell yarwyck', 26),('farlen', 26),('young lyanna', 26),('unsullied', 25),('meereen slave', 25),('maester pycell', 25),('hugh of vale', 24),('whore', 24),('walda', 24),('colen', 23),('edmure roslin', 23),('guymon', 23),('rhaegar', 23),('leo lefford', 21),('balon dwarf', 21),('harpy', 21),('daisy', 20),('musician', 20),('moles town madam', 20),('mace tyrell', 20),('assassin', 19),('frey man', 19),('donnel', 19),('bolton bannerman', 19),('young man', 19),('shagga', 18),('joffrey dwarf', 18),('sissy', 18),('daario naharis', 18),('bolton officer', 18),('othell yarwick', 18),('umber', 18),('martha', 18),('masha heddle', 17),('master', 17),('wildling elder', 17),('young benjen', 17),('child of forest', 17),('belicho', 17),('lyanna mormont', 17),('owen', 17),('end', 16),('jacks', 16),('allister', 16),('attendant', 16),('stable boy', 15),('portan', 15),('ahsa', 15),('brother', 15),('vale knight', 15),('ser alliser', 14),('tansy', 14),('renly dwarf', 14),('master of arms', 14),('brothers', 14),('robett glover', 14),('little bird', 13),('street urchin', 13),('gerard', 13),('axell florent', 13),('bolton guard', 13),('grand maester pyrcelle', 13),('ellia', 13),('steward of house stark', 12),('stark guard', 12),('tribesmen of vale', 12),('warg', 12),('ranger', 12),('slaver', 12),('buer', 12),('septa', 12),('shouting', 12),('vicky', 12),('lannister soldier', 11),('mhaegan', 11),('silk king', 11),('morgans friend', 11),('manservant', 11),('lannister scout', 10),('kings landing baker', 10),('galbart glover', 10),('servant', 10),('greizhen', 10),('roslin', 10),('lord bolton', 10),('quick', 10),('strong', 10),('male voice', 10),('dothraki', 10),('bryndel', 10),('tell me something', 9),('kings guard', 9),('addam marbrand', 9),('group', 9),('janos slunt', 9),('slaves', 9),('client', 9),('listeners', 9),('brans voice', 9),('northman', 9),('everyone', 8),('stevron frey', 8),('stannis dwarf', 8),('ser vance', 8),('knight of house bracken', 7),('nights watch brother', 7),('rodrik cassal', 7),('populace', 7),('dolorous', 7),('rikon', 7),('pyattpree', 7),('cooper', 7),('bowen marsh', 7),('knight of house whent', 6),('nights watcher', 6),('several stark bannermen', 6),('mountian', 6),('everybody', 6),('daughter', 6),('hunters', 6),('mosador', 6),('bystanders', 6),('dothraki man', 6),('lyann', 6),('father', 6),('a voice', 5),('myrcella baratheon', 5),('rhakaro', 5),('deanerys targarian', 5),('ser rodrik', 5),('all three', 5),('timett', 5),('ironborn', 5),('innkeeper', 5),('white rat', 5),('buyer', 5),('brothel keeper', 5),('young rodrik', 5),('kevin', 5),('unsullied captain', 5),('barriston', 4),('unidentified nights watchers', 4),('cohollo', 4),('others at table', 4),('eddision', 4),('boy', 4),('willem', 4),('first mate', 4),('squire', 4),('merchant', 4),('sammy', 4),('wun wun', 4),('survivor', 4),('hooded figure', 4),('mistress', 4),('both', 4),('jonrobb', 3),('jhiqui', 3),('beric dondarrion', 3),('night watch stable boy', 3),('jaremy rykker', 3),('tomard', 3),('ryger rivers', 3),('armory', 3),('cuard', 3),('mar', 3),('tailor', 3),('ollys mother', 3),('lhara', 3),('together', 3),('nights watch', 3),('member', 3),('waitress', 3),('blonde prostitute', 3),('voice', 3),('ned alys', 3),('john royce', 3),('maryn trant', 2),('voices outside', 2),('stark bannermen', 2),('watchman', 2),('spice', 2),('quent', 2),('driver', 2),('merry', 2),('officer', 2),('dolrous edd', 2),('yarwick', 2),('thenn warg', 2),('sand snakes', 2),('karstark', 2),('waldery frey', 2),('archers', 2),('cold', 1),('title', 1),('main', 1),('karl', 1),('doloroud edd', 1),('slave buyer', 1),('giant', 1),('head prostitute', 1),('nights watchmen', 1),('dothraki woman', 1),('little sam', 1),('riverlands lord', 1),('dornish prince', 1),('ironborn lord', 1),('vale lord', 1)]
答案
df_words=df_words.sort_values(by='Name')
L_count=[]
N_words=list(zip(df_words['Name'],df_words['Words']))
for i in N_words:if i==N_words[0]:L_count.append(i[1])last=i[0]else:L_count.append(L_count[-1]+i[1] if i[0]==last else i[1])last=i[0]
df_words['Count']=L_count
df_words['Name'][df_words['Count'].idxmax()]
'tyrion lannister'
【练习二】现有一份关于科比的投篮数据集,请解决如下问题:
(a)哪种action_type和combined_shot_type的组合是最多的?
(b)在所有被记录的game_id中,遭遇到最多的opponent是一个支?
pd.read_csv('data/Kobe_data.csv',index_col='shot_id').head()
#index_col的作用是将某一列作为行索引
|
action_type
|
combined_shot_type
|
game_event_id
|
game_id
|
lat
|
loc_x
|
loc_y
|
lon
|
minutes_remaining
|
period
|
...
|
shot_made_flag
|
shot_type
|
shot_zone_area
|
shot_zone_basic
|
shot_zone_range
|
team_id
|
team_name
|
game_date
|
matchup
|
opponent
|
shot_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1
|
Jump Shot
|
Jump Shot
|
10
|
20000012
|
33.9723
|
167
|
72
|
-118.1028
|
10
|
1
|
...
|
NaN
|
2PT Field Goal
|
Right Side(R)
|
Mid-Range
|
16-24 ft.
|
1610612747
|
Los Angeles Lakers
|
2000/10/31
|
LAL @ POR
|
POR
|
2
|
Jump Shot
|
Jump Shot
|
12
|
20000012
|
34.0443
|
-157
|
0
|
-118.4268
|
10
|
1
|
...
|
0.0
|
2PT Field Goal
|
Left Side(L)
|
Mid-Range
|
8-16 ft.
|
1610612747
|
Los Angeles Lakers
|
2000/10/31
|
LAL @ POR
|
POR
|
3
|
Jump Shot
|
Jump Shot
|
35
|
20000012
|
33.9093
|
-101
|
135
|
-118.3708
|
7
|
1
|
...
|
1.0
|
2PT Field Goal
|
Left Side Center(LC)
|
Mid-Range
|
16-24 ft.
|
1610612747
|
Los Angeles Lakers
|
2000/10/31
|
LAL @ POR
|
POR
|
4
|
Jump Shot
|
Jump Shot
|
43
|
20000012
|
33.8693
|
138
|
175
|
-118.1318
|
6
|
1
|
...
|
0.0
|
2PT Field Goal
|
Right Side Center(RC)
|
Mid-Range
|
16-24 ft.
|
1610612747
|
Los Angeles Lakers
|
2000/10/31
|
LAL @ POR
|
POR
|
5
|
Driving Dunk Shot
|
Dunk
|
155
|
20000012
|
34.0443
|
0
|
0
|
-118.2698
|
6
|
2
|
...
|
1.0
|
2PT Field Goal
|
Center(C)
|
Restricted Area
|
Less Than 8 ft.
|
1610612747
|
Los Angeles Lakers
|
2000/10/31
|
LAL @ POR
|
POR
|
5 rows × 24 columns
(a)
df=pd.read_csv('data/Kobe_data.csv',index_col='shot_id')
pd.Series(list(zip(df['action_type'],df['combined_shot_type']))).value_counts().index[0]
('Jump Shot', 'Jump Shot')
(b)
就是每一个gameid对应一场比赛
每场比赛是一个对手,就问哪个对手遇到的最多
df['game_id'].unique()
array([20000012, 20000019, 20000047, ..., 49900086, 49900087, 49900088],dtype=int64)
df.groupby('game_id')['opponent'].unique().apply(lambda x:x[0]).value_counts().nlargest(1)
#pd.Series(list(zip(*pd.Series(list(zip(df['game_id'],df['opponent']))).unique().tolist()) )).value_counts().index[0]#nlargest(1)
SAS 91
Name: opponent, dtype: int64
df.groupby('game_id')['opponent'].unique().astype(str).value_counts().nlargest(1)
['SAS'] 91
Name: opponent, dtype: int64
list(zip(df['game_id'],df['opponent']))
pd.Series(pd.Series(list(zip(df['game_id'],df['opponent']))).unique()).value_counts()#.tolist()
(29701127, GSW) 1
(20400915, CHA) 1
(20100440, DEN) 1
(21100169, CLE) 1
(20200372, PHI) 1..
(20200842, SEA) 1
(20700326, SAS) 1
(29601055, DEN) 1
(49700071, UTA) 1
(21200888, ATL) 1
Length: 1559, dtype: int64
pd.Series(list(list(zip(*(pd.Series(list(zip(df['game_id'],df['opponent']))).unique()).tolist()))[1])).value_counts().nlargest(1)
SAS 91
dtype: int64
思维导图来自队友阿布
第1章 Pandas基础相关推荐
- pandas 第二章 pandas基础
第二章 pandas基础 import numpy as np import pandas as pd import xlrd 在开始学习前,请保证pandas的版本号不低于如下所示的版本,否则请务必 ...
- 第一章 Pandas基础
第一章 Pandas基础 导入pandas与numpy模块: import pandas as pandas import numpy as np 查看pandas的版本: print(pd.__ve ...
- joyful pandas第1章 Pandas基础
joyful pandas第1章 Pandas基础 Learning Objective 理论部分 一.文件读取与写入 1. 读取 (a)csv格式 (b)txt格式 (c)xls或xlsx格式 2. ...
- 第二章 pandas基础
文章目录 前言 一.文件的读取和写入 1.文件读取 2.数据写入 二.基本数据结构 1.Series 2.DataFrame 三.常用基本函数 1. 汇总函数 2. 特征统计函数 3. 唯一值函数 4 ...
- Pandas 基础知识
目录 第二章 pandas基础 一.文件的读取和写入 1. 文件读取 2. 数据写入 二.基本数据结构 1. Series 2. DataFrame 三.常用基本函数 1. 汇总函数 2. 特征统计函 ...
- Datawhale pandas学习任务二:pandas基础
第二章Pandas基础 import numpy as np import pandas as pd pd.__version__ '1.1.3' 一.文件读取和写入 1.文件读取 read_csv ...
- 快乐学习Pandas入门篇:Pandas基础
Datawhale学习 作者:杨煜,Datawhale成员 寄语:本文对Pandas基础内容进行了梳理,从文件读取与写入.Series及DataFrame基本数据结构.常用基本函数及排序四个模块快速入 ...
- 《利用python进行数据分析》读书笔记--第四章 numpy基础:数组和矢量计算
第四章 Numpy基础:数组和矢量计算 第一部分:numpy的ndarray:一种多维数组对象 实话说,用numpy的主要目的在于应用矢量化运算.Numpy并没有多么高级的数据分析功能,理解Numpy ...
- 数据载入、Pandas基础和探索性数据分析
1. 载入数据及初步观察 1.1 载入数据 数据集下载 https://www.kaggle.com/c/titanic/overview 1.1.1 导入numpy和pandas import nu ...
- Pandas入门篇:Pandas基础
Datawhale学习 作者:杨煜,Datawhale成员 寄语:本文对Pandas基础内容进行了梳理,从文件读取与写入.Series及DataFrame基本数据结构.常用基本函数及排序四个模块快速入 ...
最新文章
- Line上半年扭亏为盈 用户及营收遇瓶颈
- Design Pattern - Factory Method(C#)
- java基础知识之初识java
- URAL 1055 Combinations
- [BUUCTF-pwn]——ciscn_2019_s_3
- php二维数组的取值与转换
- 梁迪:源于热爱乐于分享,MVP代表圆桌会议
- 建议收藏,详细篇 PyCharm 完美教程
- php通过标识加锁,PHP通过加锁实现并发情况下抢码功能
- 抹掉所有内容和设置 macOS Monterey这个新功能太好用
- ofstream 向文件写数据
- pve安装黑群晖直通硬盘_更新教程:群晖下直接挂载WINDOWS的NTFS格式硬盘,试验通过......
- [转载] Python中的enumerate函数介绍
- Thor HTTP 抓包嗅探分析接口调试网络协议
- VC++ 6.0 中如何使用 CRT 调试功能来检测内存泄漏 调试方法
- 代码维护服务器,维护服务器的利器-pubwin 2009程序代码
- Excel VBA 操作键盘(如:移动方向键,上下左右等)
- 惊奇!?嗖拉拉新品“魔法咖啡”号称“持续高能、轻松享瘦”
- windows下开源免费waf防火墙,附可用资源包
- android fragment 白屏,当应用Crash后fragment出现白屏
热门文章
- matlab-高数 diff 二阶偏导数
- 三极管单级放大器输入输出阻抗
- Python教程:while 循环用法讲解
- cf两边黑屏怎么解决win10_win10玩红警卡顿黑屏,这样解决,有些老游戏也可以借鉴哦
- Mac中设置右键新建TXT文件
- 关于MUI一个很实用的前端框架
- Termux中proot-distro安装备份还原linux发行版笔记
- 浏览器 - 监听浏览器刷新及关闭
- 计算机管理工具怎么设置,电脑音频管理器怎么设置,教你电脑音频管理器怎么设置...
- 不用校园网如何下载论文文献|DOI号|文献免费下载