数据类型-DataFrame
- DataFrame是由多个Series数据列组成的表格数据类型,每行Series值都增加了一个共用的索引
- 既有行索引,又有列索引
- 行索引,表明不同行,横向索引,叫index,0轴,axis=0
- 列索引,表名不同列,纵向索引,叫columns,1轴,axis=1
- DataFrame数据类型可视为:二维 带标签 数组
- 每列值的类型可以不同
- 基本操作类似Series,依据行列索引操作
- 常用于表达二维数据,但也可以表达多维数据(Dataframe嵌套,极少用)
DataFrame数据类型创建
Python list列表 创建DataFrame
import pandas as pddf = pd.DataFrame([True, 1, 2.3, 'a', '你好']) # 1维
df
|
0
|
0
|
True
|
1
|
1
|
2
|
2.3
|
3
|
a
|
4
|
你好
|
df = pd.DataFrame([[True,1,2.3,'a','你好'],[1,2,3,4,5]]) #2维
df
|
0
|
1
|
2
|
3
|
4
|
0
|
True
|
1
|
2.3
|
a
|
你好
|
1
|
1
|
2
|
3.0
|
4
|
5
|
# 3维,不建议
df = pd.DataFrame([[[True,1,2.3,'a','你好'],[1,2,3,4,5]],[[True,1,2.3,'a','你好'],[1,2,3,4,5]]])
df
|
0
|
1
|
0
|
[True, 1, 2.3, a, 你好]
|
[1, 2, 3, 4, 5]
|
1
|
[True, 1, 2.3, a, 你好]
|
[1, 2, 3, 4, 5]
|
Python 字典 创建DataFrame
df = pd.DataFrame({'one':[1,2,3,4],'two':[9,8,7,6]})
df
|
one
|
two
|
0
|
1
|
9
|
1
|
2
|
8
|
2
|
3
|
7
|
3
|
4
|
6
|
# 自定义行索引
df = pd.DataFrame({'one':[1,2,3,4],'two':[9,8,7,6]},index = ['a','b','c','d'])
df
|
one
|
two
|
a
|
1
|
9
|
b
|
2
|
8
|
c
|
3
|
7
|
d
|
4
|
6
|
df = pd.DataFrame({'A' : 1,'B' : 2.3,'C' : ['x','y',5] #需要多行
})
df
|
A
|
B
|
C
|
0
|
1
|
2.3
|
x
|
1
|
1
|
2.3
|
y
|
2
|
1
|
2.3
|
5
|
dt = {'one' : pd.Series([1,2,3],index=['a','b','c']),'two' : pd.Series([9,8,7,6],index=['a','b','c','d',])
}
dt
{'one': a 1b 2c 3dtype: int64, 'two': a 9b 8c 7d 6dtype: int64}
# one two自动列索引,abcd自动行索引.每个元素对应DataFrame的一列,每个元素内的键值对应一行
d = pd.DataFrame(dt)
d
|
one
|
two
|
a
|
1.0
|
9
|
b
|
2.0
|
8
|
c
|
3.0
|
7
|
d
|
NaN
|
6
|
# 数据根据行列索引自动补齐
d_2 = pd.DataFrame(dt,index=['b','c','d'],columns=['two','three'])
d_2
|
two
|
three
|
b
|
8
|
NaN
|
c
|
7
|
NaN
|
d
|
6
|
NaN
|
ndarray数组 创建DataFrame
import numpy as npdf = pd.DataFrame(np.arange(10).reshape(2,5)) # 自动生成行/列索引
df
|
0
|
1
|
2
|
3
|
4
|
0
|
0
|
1
|
2
|
3
|
4
|
1
|
5
|
6
|
7
|
8
|
9
|
# 自定义行列索引
df = pd.DataFrame(np.random.randn(6,4),index=[1,2,3,4,5,6],columns=['a','b','c','d'])
df
|
a
|
b
|
c
|
d
|
1
|
0.274340
|
0.296507
|
0.751198
|
0.763512
|
2
|
0.181134
|
0.675380
|
0.553695
|
0.632163
|
3
|
-0.059765
|
0.347702
|
1.138297
|
-0.143998
|
4
|
-1.370677
|
-0.951640
|
0.135964
|
-0.665875
|
5
|
1.490610
|
0.420539
|
0.628784
|
2.119896
|
6
|
-1.669737
|
1.167765
|
1.254722
|
-0.948624
|
Series 创建DataFrame
e = pd.DataFrame([pd.Series([1,2,3]),pd.Series([9,8,7,6])],index=['a','b'])
e
|
0
|
1
|
2
|
3
|
a
|
1.0
|
2.0
|
3.0
|
NaN
|
b
|
9.0
|
8.0
|
7.0
|
6.0
|
DataFrame属性
di = {'姓名':['张三','李四','王五','赵六'],'性别':['男','女','女','男'],'年龄':[12,22,32,42],'地址':['北京','上海','广州','深圳']
}
di
{'地址': ['北京', '上海', '广州', '深圳'],'姓名': ['张三', '李四', '王五', '赵六'],'年龄': [12, 22, 32, 42],'性别': ['男', '女', '女', '男']}
d = pd.DataFrame(di,index=['d1','d2','d3','d4'])
d
|
地址
|
姓名
|
年龄
|
性别
|
d1
|
北京
|
张三
|
12
|
男
|
d2
|
上海
|
李四
|
22
|
女
|
d3
|
广州
|
王五
|
32
|
女
|
d4
|
深圳
|
赵六
|
42
|
男
|
d.head() # 显示头部几行
|
地址
|
姓名
|
年龄
|
性别
|
d1
|
北京
|
张三
|
12
|
男
|
d2
|
上海
|
李四
|
22
|
女
|
d3
|
广州
|
王五
|
32
|
女
|
d4
|
深圳
|
赵六
|
42
|
男
|
d.tail(3) # 显示末尾几行
|
地址
|
姓名
|
年龄
|
性别
|
d2
|
上海
|
李四
|
22
|
女
|
d3
|
广州
|
王五
|
32
|
女
|
d4
|
深圳
|
赵六
|
42
|
男
|
d.info() # 相关信息概览
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, d1 to d4
Data columns (total 4 columns):
地址 4 non-null object
姓名 4 non-null object
年龄 4 non-null int64
性别 4 non-null object
dtypes: int64(1), object(3)
memory usage: 160.0+ bytes
d.shape # 行数 列数
(4, 4)
d.dtypes # 列数据类型
地址 object
姓名 object
年龄 int64
性别 object
dtype: object
d.index # 获取行索引
Index(['d1', 'd2', 'd3', 'd4'], dtype='object')
d.columns # 获取列索引
Index(['地址', '姓名', '年龄', '性别'], dtype='object')
d.values # 获取值
array([['北京', '张三', 12, '男'],['上海', '李四', 22, '女'],['广州', '王五', 32, '女'],['深圳', '赵六', 42, '男']], dtype=object)
DataFrame查增改删
查 Read
类list/ndarray数据访问方式
dates = pd.date_range('20130101',periods=10)
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04','2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08','2013-01-09', '2013-01-10'],dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(10,4),index=dates,columns=['A','B','C','D'])
df
|
A
|
B
|
C
|
D
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
-0.442010
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
-0.113098
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
1.050036
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
0.940741
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
-0.549143
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
0.869293
|
#索引
df['A']
2013-01-01 0.754077
2013-01-02 0.103394
2013-01-03 0.174730
2013-01-04 -0.950517
2013-01-05 0.076178
2013-01-06 1.371702
2013-01-07 0.126720
2013-01-08 -1.246918
2013-01-09 0.941099
2013-01-10 1.951555
Freq: D, Name: A, dtype: float64
df.A
2013-01-01 0.754077
2013-01-02 0.103394
2013-01-03 0.174730
2013-01-04 -0.950517
2013-01-05 0.076178
2013-01-06 1.371702
2013-01-07 0.126720
2013-01-08 -1.246918
2013-01-09 0.941099
2013-01-10 1.951555
Freq: D, Name: A, dtype: float64
df['A']['2013-01-01'] # 先列后行
0.75407705661157032
df.A['2013-01-01']
0.75407705661157032
df[['A','C']]
|
A
|
C
|
2013-01-01
|
0.754077
|
-0.557050
|
2013-01-02
|
0.103394
|
-0.413054
|
2013-01-03
|
0.174730
|
1.781379
|
2013-01-04
|
-0.950517
|
-0.097138
|
2013-01-05
|
0.076178
|
1.142290
|
2013-01-06
|
1.371702
|
-1.470106
|
2013-01-07
|
0.126720
|
-2.212507
|
2013-01-08
|
-1.246918
|
1.761499
|
2013-01-09
|
0.941099
|
1.927863
|
2013-01-10
|
1.951555
|
-0.171690
|
Pandas专用的数据访问方式 — .loc
通过自定义索引获取数据
#选取某行
df.loc['2013-01-01']
A 0.754077
B -0.346202
C -0.557050
D 0.778106
Name: 2013-01-01 00:00:00, dtype: float64
#选取某列
df.loc[:,'A']
2013-01-01 0.754077
2013-01-02 0.103394
2013-01-03 0.174730
2013-01-04 -0.950517
2013-01-05 0.076178
2013-01-06 1.371702
2013-01-07 0.126720
2013-01-08 -1.246918
2013-01-09 0.941099
2013-01-10 1.951555
Freq: D, Name: A, dtype: float64
# 选取特定值
df.loc['2013-01-01','A'] # 先行后列
0.75407705661157032
# 选取指定的行/列
df.loc[[dates[0],dates[2]],:] # 指定行
|
A
|
B
|
C
|
D
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
df.loc[:,['A','B']] # 指定列
|
A
|
B
|
2013-01-01
|
0.754077
|
-0.346202
|
2013-01-02
|
0.103394
|
-1.051044
|
2013-01-03
|
0.174730
|
2.056007
|
2013-01-04
|
-0.950517
|
-0.226887
|
2013-01-05
|
0.076178
|
-0.518970
|
2013-01-06
|
1.371702
|
-1.028873
|
2013-01-07
|
0.126720
|
-0.251519
|
2013-01-08
|
-1.246918
|
1.530266
|
2013-01-09
|
0.941099
|
-2.420932
|
2013-01-10
|
1.951555
|
-0.264012
|
df.loc[[dates[0],dates[2]],['A','B']] # 指定行列
|
A
|
B
|
2013-01-01
|
0.754077
|
-0.346202
|
2013-01-03
|
0.174730
|
2.056007
|
# 切片
df.loc['2013-01-01':'2013-01-04',:] # 对行切片
|
A
|
B
|
C
|
D
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
-0.442010
|
df.loc[:,'A':'C'] # 对列切片
|
A
|
B
|
C
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
# 切片选取连续区块。行,列。左开右闭
df.loc['2013-01-01':'2013-01-04','A':'C']
|
A
|
B
|
C
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
.iloc 通过默认索引获取数据
# 选取某行
df.iloc[3]
A -0.950517
B -0.226887
C -0.097138
D -0.442010
Name: 2013-01-04 00:00:00, dtype: float64
# 选取某列
df.iloc[:,2]
2013-01-01 -0.557050
2013-01-02 -0.413054
2013-01-03 1.781379
2013-01-04 -0.097138
2013-01-05 1.142290
2013-01-06 -1.470106
2013-01-07 -2.212507
2013-01-08 1.761499
2013-01-09 1.927863
2013-01-10 -0.171690
Freq: D, Name: C, dtype: float64
# 选取特定值:
df.iloc[1,2]
-0.41305425875508139
# 选取指定的行/列
df.iloc[[1,2,4],:] # 指定行
|
A
|
B
|
C
|
D
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
df.iloc[:,[0,2]] # 指定列
|
A
|
C
|
2013-01-01
|
0.754077
|
-0.557050
|
2013-01-02
|
0.103394
|
-0.413054
|
2013-01-03
|
0.174730
|
1.781379
|
2013-01-04
|
-0.950517
|
-0.097138
|
2013-01-05
|
0.076178
|
1.142290
|
2013-01-06
|
1.371702
|
-1.470106
|
2013-01-07
|
0.126720
|
-2.212507
|
2013-01-08
|
-1.246918
|
1.761499
|
2013-01-09
|
0.941099
|
1.927863
|
2013-01-10
|
1.951555
|
-0.171690
|
df.iloc[[1,2,4],[0,2]] # 指定行列 ,先行后列
|
A
|
C
|
2013-01-02
|
0.103394
|
-0.413054
|
2013-01-03
|
0.174730
|
1.781379
|
2013-01-05
|
0.076178
|
1.142290
|
# 切片
df.iloc[1:3,:] # 对行切片:
|
A
|
B
|
C
|
D
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
df.iloc[:,1:3] # 对列切片:
|
B
|
C
|
2013-01-01
|
-0.346202
|
-0.557050
|
2013-01-02
|
-1.051044
|
-0.413054
|
2013-01-03
|
2.056007
|
1.781379
|
2013-01-04
|
-0.226887
|
-0.097138
|
2013-01-05
|
-0.518970
|
1.142290
|
2013-01-06
|
-1.028873
|
-1.470106
|
2013-01-07
|
-0.251519
|
-2.212507
|
2013-01-08
|
1.530266
|
1.761499
|
2013-01-09
|
-2.420932
|
1.927863
|
2013-01-10
|
-0.264012
|
-0.171690
|
df.iloc[3:5,0:2] # 切片选取连续区块。行,列。左开右闭
|
A
|
B
|
2013-01-04
|
-0.950517
|
-0.226887
|
2013-01-05
|
0.076178
|
-0.518970
|
Boolean索引
# 通过某列选择数据:
df[df.A > 0]
|
A
|
B
|
C
|
D
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
-0.113098
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
1.050036
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
-0.549143
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
0.869293
|
# 通过where选择数据:
b = df[df > 0]
b
|
A
|
B
|
C
|
D
|
2013-01-01
|
0.754077
|
NaN
|
NaN
|
0.778106
|
2013-01-02
|
0.103394
|
NaN
|
NaN
|
0.268955
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
2013-01-04
|
NaN
|
NaN
|
NaN
|
NaN
|
2013-01-05
|
0.076178
|
NaN
|
1.142290
|
NaN
|
2013-01-06
|
1.371702
|
NaN
|
NaN
|
NaN
|
2013-01-07
|
0.126720
|
NaN
|
NaN
|
1.050036
|
2013-01-08
|
NaN
|
1.530266
|
1.761499
|
0.940741
|
2013-01-09
|
0.941099
|
NaN
|
1.927863
|
NaN
|
2013-01-10
|
1.951555
|
NaN
|
NaN
|
0.869293
|
type(b['A']['2013-01-01'])
numpy.float64
# 通过 isin() 过滤数据:
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three','five','four','three','five']
df2
|
A
|
B
|
C
|
D
|
E
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
one
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
one
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
two
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
-0.442010
|
three
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
four
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
-0.113098
|
three
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
1.050036
|
five
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
0.940741
|
four
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
-0.549143
|
three
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
0.869293
|
five
|
df2['E'].isin(['one','four'])
2013-01-01 True
2013-01-02 True
2013-01-03 False
2013-01-04 False
2013-01-05 True
2013-01-06 False
2013-01-07 False
2013-01-08 True
2013-01-09 False
2013-01-10 False
Freq: D, Name: E, dtype: bool
df2[df2['E'].isin(['one','four'])]
|
A
|
B
|
C
|
D
|
E
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
one
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
one
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
four
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
0.940741
|
four
|
增 Create
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
s1
2013-01-02 1
2013-01-03 2
2013-01-04 3
2013-01-05 4
2013-01-06 5
2013-01-07 6
Freq: D, dtype: int64
# 新增一列数据
df2['F'] = s1
df2
|
A
|
B
|
C
|
D
|
E
|
F
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
0.778106
|
one
|
NaN
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
0.268955
|
one
|
1.0
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
1.643397
|
two
|
2.0
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
-0.442010
|
three
|
3.0
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
-0.952401
|
four
|
4.0
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
-0.113098
|
three
|
5.0
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
1.050036
|
five
|
6.0
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
0.940741
|
four
|
NaN
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
-0.549143
|
three
|
NaN
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
0.869293
|
five
|
NaN
|
改 Update
# 更新一列值
df2.loc[:,'D']
2013-01-01 0.778106
2013-01-02 0.268955
2013-01-03 1.643397
2013-01-04 -0.442010
2013-01-05 -0.952401
2013-01-06 -0.113098
2013-01-07 1.050036
2013-01-08 0.940741
2013-01-09 -0.549143
2013-01-10 0.869293
Freq: D, Name: D, dtype: float64
df2.loc[:,'D'] = 5
df2
|
A
|
B
|
C
|
D
|
E
|
F
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
5
|
one
|
NaN
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
5
|
one
|
1.0
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
5
|
two
|
2.0
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
5
|
three
|
3.0
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
5
|
four
|
4.0
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
5
|
three
|
5.0
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
5
|
five
|
6.0
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
5
|
four
|
NaN
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
5
|
three
|
NaN
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
5
|
five
|
NaN
|
df2.iloc[1,3]
5
df2.iloc[1,3] = 10.1
df2
|
A
|
B
|
C
|
D
|
E
|
F
|
2013-01-01
|
0.754077
|
-0.346202
|
-0.557050
|
5.0
|
one
|
NaN
|
2013-01-02
|
0.103394
|
-1.051044
|
-0.413054
|
10.1
|
one
|
1.0
|
2013-01-03
|
0.174730
|
2.056007
|
1.781379
|
5.0
|
two
|
2.0
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
5.0
|
three
|
3.0
|
2013-01-05
|
0.076178
|
-0.518970
|
1.142290
|
5.0
|
four
|
4.0
|
2013-01-06
|
1.371702
|
-1.028873
|
-1.470106
|
5.0
|
three
|
5.0
|
2013-01-07
|
0.126720
|
-0.251519
|
-2.212507
|
5.0
|
five
|
6.0
|
2013-01-08
|
-1.246918
|
1.530266
|
1.761499
|
5.0
|
four
|
NaN
|
2013-01-09
|
0.941099
|
-2.420932
|
1.927863
|
5.0
|
three
|
NaN
|
2013-01-10
|
1.951555
|
-0.264012
|
-0.171690
|
5.0
|
five
|
NaN
|
# 通过where更新
df3 = df.copy()
df3[df3 > 0] = -df3
df3
|
A
|
B
|
C
|
D
|
2013-01-01
|
-0.754077
|
-0.346202
|
-0.557050
|
-0.778106
|
2013-01-02
|
-0.103394
|
-1.051044
|
-0.413054
|
-0.268955
|
2013-01-03
|
-0.174730
|
-2.056007
|
-1.781379
|
-1.643397
|
2013-01-04
|
-0.950517
|
-0.226887
|
-0.097138
|
-0.442010
|
2013-01-05
|
-0.076178
|
-0.518970
|
-1.142290
|
-0.952401
|
2013-01-06
|
-1.371702
|
-1.028873
|
-1.470106
|
-0.113098
|
2013-01-07
|
-0.126720
|
-0.251519
|
-2.212507
|
-1.050036
|
2013-01-08
|
-1.246918
|
-1.530266
|
-1.761499
|
-0.940741
|
2013-01-09
|
-0.941099
|
-2.420932
|
-1.927863
|
-0.549143
|
2013-01-10
|
-1.951555
|
-0.264012
|
-0.171690
|
-0.869293
|