1. 基础:数据导入,读取,索引
1.1 数据导入,读取
import pandas as pa#导入库
titan=pa.read_csv('./titanic_train911.csv')#读取文件
titan.head(5)#查看前5行
|
PassengerId
|
Survived
|
Pclass
|
Name
|
Sex
|
Age
|
SibSp
|
Parch
|
Ticket
|
Fare
|
Cabin
|
Embarked
|
0
|
1
|
0
|
3
|
Braund, Mr. Owen Harris
|
male
|
22.0
|
1
|
0
|
A/5 21171
|
7.2500
|
NaN
|
S
|
1
|
2
|
1
|
1
|
Cumings, Mrs. John Bradley (Florence Briggs Th...
|
female
|
38.0
|
1
|
0
|
PC 17599
|
71.2833
|
C85
|
C
|
2
|
3
|
1
|
3
|
Heikkinen, Miss. Laina
|
female
|
26.0
|
0
|
0
|
STON/O2. 3101282
|
7.9250
|
NaN
|
S
|
3
|
4
|
1
|
1
|
Futrelle, Mrs. Jacques Heath (Lily May Peel)
|
female
|
35.0
|
1
|
0
|
113803
|
53.1000
|
C123
|
S
|
4
|
5
|
0
|
3
|
Allen, Mr. William Henry
|
male
|
35.0
|
0
|
0
|
373450
|
8.0500
|
NaN
|
S
|
titan.info()#信息
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId 891 non-null int64
Survived 891 non-null int64
Pclass 891 non-null int64
Name 891 non-null object
Sex 891 non-null object
Age 714 non-null float64
SibSp 891 non-null int64
Parch 891 non-null int64
Ticket 891 non-null object
Fare 891 non-null float64
Cabin 204 non-null object
Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
titan.columns#查看表格的列名
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp','Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],dtype='object')
titan.dtypes#查看数据类型
PassengerId int64
Survived int64
Pclass int64
Name object
Sex object
Age float64
SibSp int64
Parch int64
Ticket object
Fare float64
Cabin object
Embarked object
dtype: object
titan.values#查看数据
array([[1, 0, 3, ..., 7.25, nan, 'S'],[2, 1, 1, ..., 71.2833, 'C85', 'C'],[3, 1, 3, ..., 7.925, nan, 'S'],...,[889, 0, 3, ..., 23.45, nan, 'S'],[890, 1, 1, ..., 30.0, 'C148', 'C'],[891, 0, 3, ..., 7.75, nan, 'Q']], dtype=object)
titan.index#查看索引
RangeIndex(start=0, stop=891, step=1)
titan=titan.set_index('PassengerId')#自己定义索引(可取列名)
titan.head(8)
|
Survived
|
Pclass
|
Age
|
SibSp
|
Parch
|
Ticket
|
Cabin
|
Embarked
|
PassengerId
|
|
|
|
|
|
|
|
|
1
|
0
|
3
|
22.0
|
1
|
0
|
A/5 21171
|
NaN
|
S
|
2
|
1
|
1
|
38.0
|
1
|
0
|
PC 17599
|
C85
|
C
|
3
|
1
|
3
|
26.0
|
0
|
0
|
STON/O2. 3101282
|
NaN
|
S
|
4
|
1
|
1
|
35.0
|
1
|
0
|
113803
|
C123
|
S
|
5
|
0
|
3
|
35.0
|
0
|
0
|
373450
|
NaN
|
S
|
6
|
0
|
3
|
NaN
|
0
|
0
|
330877
|
NaN
|
Q
|
7
|
0
|
1
|
54.0
|
0
|
0
|
17463
|
E46
|
S
|
8
|
0
|
3
|
2.0
|
3
|
1
|
349909
|
NaN
|
S
|
titan.describe()#查看基本统计特性
|
Survived
|
Pclass
|
Age
|
SibSp
|
Parch
|
count
|
891.000000
|
891.000000
|
714.000000
|
891.000000
|
891.000000
|
mean
|
0.383838
|
2.308642
|
29.699118
|
0.523008
|
0.381594
|
std
|
0.486592
|
0.836071
|
14.526497
|
1.102743
|
0.806057
|
min
|
0.000000
|
1.000000
|
0.420000
|
0.000000
|
0.000000
|
25%
|
0.000000
|
2.000000
|
20.125000
|
0.000000
|
0.000000
|
50%
|
0.000000
|
3.000000
|
28.000000
|
0.000000
|
0.000000
|
75%
|
1.000000
|
3.000000
|
38.000000
|
1.000000
|
0.000000
|
max
|
1.000000
|
3.000000
|
80.000000
|
8.000000
|
6.000000
|
1.2 索引
titan['Survived'][:8]#通过索引查看0-8行的Survived 属性
PassengerId
1 0
2 1
3 1
4 1
5 0
6 0
7 0
8 0
Name: Survived, dtype: int64
findlist=['Survived','Age']
titan[findlist][:8]#将要查询的属性列名保存到list中传入进行查看
|
Survived
|
Age
|
PassengerId
|
|
|
1
|
0
|
22.0
|
2
|
1
|
38.0
|
3
|
1
|
26.0
|
4
|
1
|
35.0
|
5
|
0
|
35.0
|
6
|
0
|
NaN
|
7
|
0
|
54.0
|
8
|
0
|
2.0
|
titan=titan.set_index('Age')#先指定索引
titan.loc[38]#loc方法定位
|
Survived
|
Pclass
|
SibSp
|
Parch
|
Ticket
|
Cabin
|
Embarked
|
Age
|
|
|
|
|
|
|
|
38.0
|
1
|
1
|
1
|
0
|
PC 17599
|
C85
|
C
|
38.0
|
1
|
3
|
1
|
5
|
347077
|
NaN
|
S
|
38.0
|
1
|
1
|
0
|
0
|
113572
|
B28
|
NaN
|
38.0
|
0
|
3
|
0
|
0
|
349249
|
NaN
|
S
|
38.0
|
1
|
1
|
1
|
0
|
19943
|
C93
|
S
|
38.0
|
0
|
1
|
0
|
1
|
PC 17582
|
C91
|
S
|
38.0
|
0
|
2
|
0
|
0
|
237671
|
NaN
|
S
|
38.0
|
0
|
3
|
0
|
0
|
SOTON/O.Q. 3101306
|
NaN
|
S
|
38.0
|
0
|
3
|
0
|
0
|
315089
|
NaN
|
S
|
38.0
|
1
|
1
|
0
|
0
|
PC 17757
|
C45
|
C
|
38.0
|
0
|
1
|
0
|
0
|
19972
|
NaN
|
S
|
titan.iloc[666]#iloc方法查找定位
Survived 0
Pclass 2
SibSp 0
Parch 0
Ticket 234686
Cabin NaN
Embarked S
Name: 25.0, dtype: object
titan.iloc[666:668,2:5]
|
SibSp
|
Parch
|
Ticket
|
Age
|
|
|
|
25.0
|
0
|
0
|
234686
|
NaN
|
0
|
0
|
312993
|
titan=pa.read_csv('./titanic_train911.csv')#读取文件
titan.head()
|
PassengerId
|
Survived
|
Pclass
|
Name
|
Sex
|
Age
|
SibSp
|
Parch
|
Ticket
|
Fare
|
Cabin
|
Embarked
|
0
|
1
|
0
|
3
|
Braund, Mr. Owen Harris
|
male
|
22.0
|
1
|
0
|
A/5 21171
|
7.2500
|
NaN
|
S
|
1
|
2
|
1
|
1
|
Cumings, Mrs. John Bradley (Florence Briggs Th...
|
female
|
38.0
|
1
|
0
|
PC 17599
|
71.2833
|
C85
|
C
|
2
|
3
|
1
|
3
|
Heikkinen, Miss. Laina
|
female
|
26.0
|
0
|
0
|
STON/O2. 3101282
|
7.9250
|
NaN
|
S
|
3
|
4
|
1
|
1
|
Futrelle, Mrs. Jacques Heath (Lily May Peel)
|
female
|
35.0
|
1
|
0
|
113803
|
53.1000
|
C123
|
S
|
4
|
5
|
0
|
3
|
Allen, Mr. William Henry
|
male
|
35.0
|
0
|
0
|
373450
|
8.0500
|
NaN
|
S
|
titan=titan.set_index('Name')
titan.head()
|
PassengerId
|
Survived
|
Pclass
|
Sex
|
Age
|
SibSp
|
Parch
|
Ticket
|
Fare
|
Cabin
|
Embarked
|
Name
|
|
|
|
|
|
|
|
|
|
|
|
Braund, Mr. Owen Harris
|
1
|
0
|
3
|
male
|
22.0
|
1
|
0
|
A/5 21171
|
7.2500
|
NaN
|
S
|
Cumings, Mrs. John Bradley (Florence Briggs Thayer)
|
2
|
1
|
1
|
female
|
38.0
|
1
|
0
|
PC 17599
|
71.2833
|
C85
|
C
|
Heikkinen, Miss. Laina
|
3
|
1
|
3
|
female
|
26.0
|
0
|
0
|
STON/O2. 3101282
|
7.9250
|
NaN
|
S
|
Futrelle, Mrs. Jacques Heath (Lily May Peel)
|
4
|
1
|
1
|
female
|
35.0
|
1
|
0
|
113803
|
53.1000
|
C123
|
S
|
Allen, Mr. William Henry
|
5
|
0
|
3
|
male
|
35.0
|
0
|
0
|
373450
|
8.0500
|
NaN
|
S
|
titan[titan['Age']>18][2:8]#bool查询
|
PassengerId
|
Survived
|
Pclass
|
Sex
|
Age
|
SibSp
|
Parch
|
Ticket
|
Fare
|
Cabin
|
Embarked
|
Name
|
|
|
|
|
|
|
|
|
|
|
|
Heikkinen, Miss. Laina
|
3
|
1
|
3
|
female
|
26.0
|
0
|
0
|
STON/O2. 3101282
|
7.9250
|
NaN
|
S
|
Futrelle, Mrs. Jacques Heath (Lily May Peel)
|
4
|
1
|
1
|
female
|
35.0
|
1
|
0
|
113803
|
53.1000
|
C123
|
S
|
Allen, Mr. William Henry
|
5
|
0
|
3
|
male
|
35.0
|
0
|
0
|
373450
|
8.0500
|
NaN
|
S
|
McCarthy, Mr. Timothy J
|
7
|
0
|
1
|
male
|
54.0
|
0
|
0
|
17463
|
51.8625
|
E46
|
S
|
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
|
9
|
1
|
3
|
female
|
27.0
|
0
|
2
|
347742
|
11.1333
|
NaN
|
S
|
Bonnell, Miss. Elizabeth
|
12
|
1
|
1
|
female
|
58.0
|
0
|
0
|
113783
|
26.5500
|
C103
|
S
|
titan[titan['Age']==18][:8]
|
PassengerId
|
Survived
|
Pclass
|
Sex
|
Age
|
SibSp
|
Parch
|
Ticket
|
Fare
|
Cabin
|
Embarked
|
Name
|
|
|
|
|
|
|
|
|
|
|
|
Vander Planke, Miss. Augusta Maria
|
39
|
0
|
3
|
female
|
18.0
|
2
|
0
|
345764
|
18.0000
|
NaN
|
S
|
Arnold-Franchi, Mrs. Josef (Josefine Franchi)
|
50
|
0
|
3
|
female
|
18.0
|
1
|
0
|
349237
|
17.8000
|
NaN
|
S
|
Andrew, Mr. Edgardo Samuel
|
145
|
0
|
2
|
male
|
18.0
|
0
|
0
|
231945
|
11.5000
|
NaN
|
S
|
Klasen, Mr. Klas Albin
|
176
|
0
|
3
|
male
|
18.0
|
1
|
1
|
350404
|
7.8542
|
NaN
|
S
|
Cohen, Mr. Gurshon "Gus"
|
205
|
1
|
3
|
male
|
18.0
|
0
|
0
|
A/5 3540
|
8.0500
|
NaN
|
S
|
Fahlstrom, Mr. Arne Jonas
|
229
|
0
|
2
|
male
|
18.0
|
0
|
0
|
236171
|
13.0000
|
NaN
|
S
|
Ryerson, Miss. Emily Borie
|
312
|
1
|
1
|
female
|
18.0
|
2
|
2
|
PC 17608
|
262.3750
|
B57 B59 B63 B66
|
C
|
Wiklund, Mr. Jakob Alfred
|
372
|
0
|
3
|
male
|
18.0
|
1
|
0
|
3101267
|
6.4958
|
NaN
|
S
|
1.3 排序
titan.groupby('Sex').sum()#按照年龄分组后统计各组的sum和
|
PassengerId
|
Survived
|
Pclass
|
Age
|
SibSp
|
Parch
|
Fare
|
Sex
|
|
|
|
|
|
|
|
female
|
135343
|
233
|
678
|
7286.00
|
218
|
204
|
13966.6628
|
male
|
262043
|
109
|
1379
|
13919.17
|
248
|
136
|
14727.2865
|
titan.groupby('Sex')['Age'].mean()#按照性别分组后求各组中年龄的均值
Sex
female 27.915709
male 30.726645
Name: Age, dtype: float64
2 统计计算
mac=pa.read_csv('macrodata.csv')
mac.head()
|
year
|
quarter
|
realgdp
|
realcons
|
realinv
|
realgovt
|
realdpi
|
cpi
|
m1
|
tbilrate
|
unemp
|
pop
|
infl
|
realint
|
0
|
1959.0
|
1.0
|
2710.349
|
1707.4
|
286.898
|
470.045
|
1886.9
|
28.98
|
139.7
|
2.82
|
5.8
|
177.146
|
0.00
|
0.00
|
1
|
1959.0
|
2.0
|
2778.801
|
1733.7
|
310.859
|
481.301
|
1919.7
|
29.15
|
141.7
|
3.08
|
5.1
|
177.830
|
2.34
|
0.74
|
2
|
1959.0
|
3.0
|
2775.488
|
1751.8
|
289.226
|
491.260
|
1916.4
|
29.35
|
140.5
|
3.82
|
5.3
|
178.657
|
2.74
|
1.09
|
3
|
1959.0
|
4.0
|
2785.204
|
1753.7
|
299.356
|
484.052
|
1931.3
|
29.37
|
140.0
|
4.33
|
5.6
|
179.386
|
0.27
|
4.06
|
4
|
1960.0
|
1.0
|
2847.699
|
1770.5
|
331.722
|
462.199
|
1955.5
|
29.54
|
139.6
|
3.50
|
5.2
|
180.007
|
2.31
|
1.19
|
mac['realinv'].sum()#
205611.364
mac.sum(axis=0)#按列求和
year 402727.000
quarter 506.000
realgdp 1465897.896
realcons 979534.500
realinv 205611.364
realgovt 134655.714
realdpi 1078039.800
cpi 21330.385
m1 135589.300
tbilrate 1078.290
unemp 1194.600
pop 48664.003
infl 804.150
realint 271.310
dtype: float64
mac.mean(axis=1)#按行求均值
0 669.717000
1 681.807214
2 681.973643
3 684.259143
4 692.140500
5 691.066643
6 692.083643
7 687.265786
8 690.154643
9 699.882857
10 708.679857
11 719.592571
12 728.865429
13 734.630857
14 740.307857
15 743.272714
16 748.771071
17 754.663357
18 765.761786
19 770.859429
20 783.005000
21 792.656357
22 801.510857
23 804.556643
24 818.246500
25 825.562357
26 841.191571
27 857.239786
28 871.669929
29 873.893786...
173 2461.527500
174 2469.052643
175 2475.373643
176 2485.468214
177 2513.350214
178 2550.004357
179 2570.842071
180 2588.743786
181 2611.854143
182 2632.526786
183 2659.255071
184 2667.557429
185 2680.025786
186 2699.507571
187 2714.498857
188 2750.110786
189 2761.561000
190 2766.796214
191 2785.227357
192 2794.254500
193 2807.221143
194 2824.258929
195 2828.893143
196 2820.595929
197 2840.309786
198 2816.360214
199 2802.594286
200 2767.597071
201 2774.576071
202 2788.397500
Length: 203, dtype: float64
#mac.sum(axis ='columns')
mac.sum(axis=1)#按列求和
0 9376.038
1 9545.301
2 9547.631
3 9579.628
4 9689.967
5 9674.933
6 9689.171
7 9621.721
8 9662.165
9 9798.360
10 9921.518
11 10074.296
12 10204.116
13 10284.832
14 10364.310
15 10405.818
16 10482.795
17 10565.287
18 10720.665
19 10792.032
20 10962.070
21 11097.189
22 11221.152
23 11263.793
24 11455.451
25 11557.873
26 11776.682
27 12001.357
28 12203.379
29 12234.513...
173 34461.385
174 34566.737
175 34655.231
176 34796.555
177 35186.903
178 35700.061
179 35991.789
180 36242.413
181 36565.958
182 36855.375
183 37229.571
184 37345.804
185 37520.361
186 37793.106
187 38002.984
188 38501.551
189 38661.854
190 38735.147
191 38993.183
192 39119.563
193 39301.096
194 39539.625
195 39604.504
196 39488.343
197 39764.337
198 39429.043
199 39236.320
200 38746.359
201 38844.065
202 39037.565
Length: 203, dtype: float64
mac.cov()#协方差
|
year
|
quarter
|
realgdp
|
realcons
|
realinv
|
realgovt
|
realdpi
|
cpi
|
m1
|
tbilrate
|
unemp
|
pop
|
infl
|
realint
|
year
|
215.702580
|
-0.186558
|
4.647762e+04
|
3.329587e+04
|
8.096633e+03
|
1782.201530
|
3.510097e+04
|
889.385028
|
6.510849e+03
|
-7.164087
|
0.914942
|
547.621880
|
-6.729093
|
-0.093007
|
quarter
|
-0.186558
|
1.251183
|
2.187058e+01
|
1.421728e+01
|
4.628355e+00
|
1.913510
|
1.622233e+01
|
0.345575
|
2.667532e+00
|
0.035261
|
-0.006797
|
0.348573
|
-0.008926
|
0.065246
|
realgdp
|
46477.622176
|
21.870583
|
1.033594e+07
|
7.431573e+06
|
1.839145e+06
|
393386.359633
|
7.783769e+06
|
194353.924345
|
1.432731e+06
|
-2484.948926
|
-301.503958
|
119402.877291
|
-2006.862002
|
-416.061514
|
realcons
|
33295.865483
|
14.217276
|
7.431573e+06
|
5.351571e+06
|
1.321452e+06
|
283735.957690
|
5.601336e+06
|
139453.560568
|
1.029089e+06
|
-1906.458928
|
-189.504759
|
85662.398005
|
-1496.161105
|
-367.448608
|
realinv
|
8096.633424
|
4.628355
|
1.839145e+06
|
1.321452e+06
|
3.423447e+05
|
65506.893003
|
1.377683e+06
|
33957.136254
|
2.486486e+05
|
-448.762616
|
-158.177573
|
20952.267936
|
-366.256662
|
-72.505352
|
realgovt
|
1782.201530
|
1.913510
|
3.933864e+05
|
2.837360e+05
|
6.550689e+04
|
19842.569221
|
2.977704e+05
|
7619.553691
|
5.711414e+04
|
-119.590326
|
-5.058225
|
4539.121687
|
-135.480431
|
18.526286
|
realdpi
|
35100.969912
|
16.222333
|
7.783769e+06
|
5.601336e+06
|
1.377683e+06
|
297770.416433
|
5.873430e+06
|
146538.249776
|
1.078988e+06
|
-1834.742182
|
-108.863828
|
90094.189290
|
-1471.848862
|
-315.850267
|
cpi
|
889.385028
|
0.345575
|
1.943539e+05
|
1.394536e+05
|
3.395714e+04
|
7619.553691
|
1.465382e+05
|
3755.100856
|
2.759093e+04
|
-40.682744
|
2.210069
|
2268.750456
|
-44.328471
|
4.689722
|
m1
|
6510.849454
|
2.667532
|
1.432731e+06
|
1.029089e+06
|
2.486486e+05
|
57114.142869
|
1.078988e+06
|
27590.930981
|
2.073403e+05
|
-421.744940
|
-10.890270
|
16668.489012
|
-395.866750
|
-18.638259
|
tbilrate
|
-7.164087
|
0.035261
|
-2.484949e+03
|
-1.906459e+03
|
-4.487626e+02
|
-119.590326
|
-1.834742e+03
|
-40.682744
|
-4.217449e+02
|
7.857206
|
0.940225
|
-22.802668
|
5.658413
|
2.233145
|
unemp
|
0.914942
|
-0.006797
|
-3.015040e+02
|
-1.895048e+02
|
-1.581776e+02
|
-5.058225
|
-1.088638e+02
|
2.210069
|
-1.089027e+01
|
0.940225
|
2.127439
|
0.487778
|
0.308501
|
0.632511
|
pop
|
547.621880
|
0.348573
|
1.194029e+05
|
8.566240e+04
|
2.095227e+04
|
4539.121687
|
9.009419e+04
|
2268.750456
|
1.666849e+04
|
-22.802668
|
0.487778
|
1398.045724
|
-19.926368
|
-2.014960
|
infl
|
-6.729093
|
-0.008926
|
-2.006862e+03
|
-1.496161e+03
|
-3.662567e+02
|
-135.480431
|
-1.471849e+03
|
-44.328471
|
-3.958667e+02
|
5.658413
|
0.308501
|
-19.926368
|
10.583418
|
-4.870398
|
realint
|
-0.093007
|
0.065246
|
-4.160615e+02
|
-3.674486e+02
|
-7.250535e+01
|
18.526286
|
-3.158503e+02
|
4.689722
|
-1.863826e+01
|
2.233145
|
0.632511
|
-2.014960
|
-4.870398
|
7.122486
|
mac.corr()#相关系数
|
year
|
quarter
|
realgdp
|
realcons
|
realinv
|
realgovt
|
realdpi
|
cpi
|
m1
|
tbilrate
|
unemp
|
pop
|
infl
|
realint
|
year
|
1.000000
|
-0.011356
|
0.984331
|
0.979991
|
0.942204
|
0.861450
|
0.986156
|
0.988215
|
0.973572
|
-0.174020
|
0.042711
|
0.997223
|
-0.140837
|
-0.002373
|
quarter
|
-0.011356
|
1.000000
|
0.006082
|
0.005494
|
0.007072
|
0.012144
|
0.005984
|
0.005042
|
0.005237
|
0.011246
|
-0.004166
|
0.008334
|
-0.002453
|
0.021856
|
realgdp
|
0.984331
|
0.006082
|
1.000000
|
0.999229
|
0.977708
|
0.868651
|
0.999008
|
0.986524
|
0.978696
|
-0.275745
|
-0.064297
|
0.993297
|
-0.191880
|
-0.048492
|
realcons
|
0.979991
|
0.005494
|
0.999229
|
1.000000
|
0.976290
|
0.870713
|
0.999091
|
0.983735
|
0.976946
|
-0.294004
|
-0.056163
|
0.990350
|
-0.198804
|
-0.059517
|
realinv
|
0.942204
|
0.007072
|
0.977708
|
0.976290
|
1.000000
|
0.794797
|
0.971564
|
0.947084
|
0.933281
|
-0.273622
|
-0.185347
|
0.957720
|
-0.192416
|
-0.046433
|
realgovt
|
0.861450
|
0.012144
|
0.868651
|
0.870713
|
0.794797
|
1.000000
|
0.872241
|
0.882714
|
0.890436
|
-0.302875
|
-0.024619
|
0.861811
|
-0.295641
|
0.049280
|
realdpi
|
0.986156
|
0.005984
|
0.999008
|
0.999091
|
0.971564
|
0.872241
|
1.000000
|
0.986721
|
0.977752
|
-0.270082
|
-0.030797
|
0.994238
|
-0.186683
|
-0.048834
|
cpi
|
0.988215
|
0.005042
|
0.986524
|
0.983735
|
0.947084
|
0.882714
|
0.986721
|
1.000000
|
0.988812
|
-0.236846
|
0.024727
|
0.990182
|
-0.222361
|
0.028676
|
m1
|
0.973572
|
0.005237
|
0.978696
|
0.976946
|
0.933281
|
0.890436
|
0.977752
|
0.988812
|
1.000000
|
-0.330426
|
-0.016397
|
0.979025
|
-0.267236
|
-0.015337
|
tbilrate
|
-0.174020
|
0.011246
|
-0.275745
|
-0.294004
|
-0.273622
|
-0.302875
|
-0.270082
|
-0.236846
|
-0.330426
|
1.000000
|
0.229969
|
-0.217566
|
0.620508
|
0.298516
|
unemp
|
0.042711
|
-0.004166
|
-0.064297
|
-0.056163
|
-0.185347
|
-0.024619
|
-0.030797
|
0.024727
|
-0.016397
|
0.229969
|
1.000000
|
0.008944
|
0.065015
|
0.162489
|
pop
|
0.997223
|
0.008334
|
0.993297
|
0.990350
|
0.957720
|
0.861811
|
0.994238
|
0.990182
|
0.979025
|
-0.217566
|
0.008944
|
1.000000
|
-0.163815
|
-0.020192
|
infl
|
-0.140837
|
-0.002453
|
-0.191880
|
-0.198804
|
-0.192416
|
-0.295641
|
-0.186683
|
-0.222361
|
-0.267236
|
0.620508
|
0.065015
|
-0.163815
|
1.000000
|
-0.560965
|
realint
|
-0.002373
|
0.021856
|
-0.048492
|
-0.059517
|
-0.046433
|
0.049280
|
-0.048834
|
0.028676
|
-0.015337
|
0.298516
|
0.162489
|
-0.020192
|
-0.560965
|
1.000000
|
mac['cpi'].value_counts(ascending = True,bins=4)#升序,bins:平均分成4份
(76.388, 123.795] 38
(171.203, 218.61] 38
(123.795, 171.203] 44
(28.788999999999998, 76.388] 83
Name: cpi, dtype: int64
3. 对象操作
3.1 Dataframe结构
import pandas as pa#导入库
#Dataframe结构
data = [[1,2,3],[4,5,6]]
index = ['a','b']
columns = ['A','B','C']df = pa.DataFrame(data=data,index=index,columns = columns)
df
增:df.loc()/pd.concat()(练接两个dataframe)/直接添加;删:df.drop()/del df[]/;改:通过loc和index重新定义;查:iloc/loc/索引;
df.loc['c']=[7,8,9]#增加行
df
|
A
|
B
|
C
|
a
|
1
|
2
|
3
|
b
|
4
|
5
|
6
|
D
|
7
|
7
|
7
|
c
|
7
|
8
|
9
|
df.drop(['D'],axis=0,inplace=True)#删除行
df
|
A
|
B
|
C
|
a
|
1
|
2
|
3
|
b
|
4
|
5
|
6
|
c
|
7
|
8
|
9
|
df.loc['a']['B']=20#改
df
|
A
|
B
|
C
|
a
|
1
|
20
|
3
|
b
|
4
|
5
|
6
|
c
|
7
|
8
|
9
|
df.loc['b2']#查行
B 5
C 6
D 200
Name: b2, dtype: int64
df.iloc[2]
B 8
C 9
D 300
Name: c3, dtype: int64
df['D']=[100,200,300]#增加列
df
|
A
|
B
|
C
|
D
|
a
|
1
|
20
|
3
|
100
|
b
|
4
|
5
|
6
|
200
|
c
|
7
|
8
|
9
|
300
|
del df['A']#删除列
df
|
B
|
C
|
D
|
a
|
20
|
3
|
100
|
b
|
5
|
6
|
200
|
c
|
8
|
9
|
300
|
df.index=['a1','b2','c3']#修改索引
df
|
B
|
C
|
D
|
a1
|
20
|
3
|
100
|
b2
|
5
|
6
|
200
|
c3
|
8
|
9
|
300
|
df['D']#查列
a1 100
b2 200
c3 300
Name: D, dtype: int64
data = [[10,20,30],[40,50,60],[70,80,90]]
index = ['j','k','l']
columns = ['B','C','D']df1 = pa.DataFrame(data=data,index=index,columns = columns)
df1
|
B
|
C
|
D
|
j
|
10
|
20
|
30
|
k
|
40
|
50
|
60
|
l
|
70
|
80
|
90
|
df2=pa.concat([df,df1])#拼接
df2
|
B
|
C
|
D
|
a1
|
20
|
3
|
100
|
b2
|
5
|
6
|
200
|
c3
|
8
|
9
|
300
|
j
|
10
|
20
|
30
|
k
|
40
|
50
|
60
|
l
|
70
|
80
|
90
|
df2=pa.concat([df,df1],axis=1)
df2
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.To accept the future behavior, pass 'sort=False'.To retain the current behavior and silence the warning, pass 'sort=True'."""Entry point for launching an IPython kernel.
|
B
|
C
|
D
|
B
|
C
|
D
|
a1
|
20.0
|
3.0
|
100.0
|
NaN
|
NaN
|
NaN
|
b2
|
5.0
|
6.0
|
200.0
|
NaN
|
NaN
|
NaN
|
c3
|
8.0
|
9.0
|
300.0
|
NaN
|
NaN
|
NaN
|
j
|
NaN
|
NaN
|
NaN
|
10.0
|
20.0
|
30.0
|
k
|
NaN
|
NaN
|
NaN
|
40.0
|
50.0
|
60.0
|
l
|
NaN
|
NaN
|
NaN
|
70.0
|
80.0
|
90.0
|
left = pa.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})
right = pa.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})
|
key
|
A
|
B
|
0
|
K0
|
A0
|
B0
|
1
|
K1
|
A1
|
B1
|
2
|
K2
|
A2
|
B2
|
3
|
K3
|
A3
|
B3
|
merge=pa.merge(left,right)
merge
|
key
|
A
|
B
|
C
|
D
|
0
|
K0
|
A0
|
B0
|
C0
|
D0
|
1
|
K1
|
A1
|
B1
|
C1
|
D1
|
2
|
K2
|
A2
|
B2
|
C2
|
D2
|
3
|
K3
|
A3
|
B3
|
C3
|
D3
|
merge=pa.merge(left,right,on='key')
merge
|
key
|
A
|
B
|
C
|
D
|
0
|
K0
|
A0
|
B0
|
C0
|
D0
|
1
|
K1
|
A1
|
B1
|
C1
|
D1
|
2
|
K2
|
A2
|
B2
|
C2
|
D2
|
3
|
K3
|
A3
|
B3
|
C3
|
D3
|
3.2 series结构
data = [10,11,12]
index = ['a','b','c']
s = pa.Series(data = data,index = index)#Sersies结构
s
a 10
b 11
c 12
dtype: int64
data = [100,110]
index = ['h','k']
s2 = pa.Series(data = data,index = index)
s3=s.append(s2)#增:append()
s3
a 10
b 11
c 12
h 100
k 110
dtype: int64
s3['L']=900#直接添加
s3
a 10
b 11
c 12
h 100
k 110
L 900
dtype: int64
del s2['h']#删除一行
s2
k 110
dtype: int64
s.drop(['b','a'],inplace = True)#删除多行用drop()
s
c 12
dtype: int64
s3.replace(to_replace = 100,value = 101,inplace = True)#改,replace()、修改索引rename()
s3
a 10
b 11
c 12
h 101
k 110
L 900
dtype: int64
s3.rename(index = {'a':'A'},inplace = True)
s3
A 10
b 11
c 12
h 101
k 110
L 900
dtype: int64
s3.loc['b']#查:可用索引/loc/Iloc等查找定位
11
s3.iloc[2]
Pandas基础学习入门级别相关推荐
- Python科学计算之Pandas基础学习
Python科学计算之Pandas基础学习 导入Pandas 我们首先要导入我们的演出明星--Pandas. 这是导入Pandas的标准方式.显然,我们不希望每时每刻都在程序中写'pandas',但是 ...
- pandas基础学习笔记(简略版)
pandas基础学习笔记(简略版) 1.DataFrame 2.series 3.基本数据操作 4.DataFrame 运算 5.pandas绘图 6.文本的读取与存储 1.DataFrame 既有行 ...
- html零基础学习入门,nlp网站例:HTML零基础入门-HTML学习大纲.doc
nlp网站例:HTML零基础入门-HTML学习大纲 一.课程目标 掌握HTML语言,能够直接编写网页程序: 学会使用层叠样式表技术: 了解最常用的客户端脚本语言JAVASCRIPT,能够编写客户端的常 ...
- 干货丨FPGA零基础学习,入门必看!
看到不少同学后台进行提问:FPGA如何入门?怎么学习?其实对于新人来说,FPGA的学习需要了解的东西还是非常多,下面IC修真院就带大家一起来了解一下吧. FPGA简介 FPGA普遍用于实现数字电路模块 ...
- JAVA基础学习入门
JAVA入门之基本语法(一) 最近开始复习JAVA语言为找工作做准备,方便以后再次复习或者用到的时候回顾.先是从JAVA的基本语法学起,因为有c和c++语言的基础知识铺垫,所以这部分的笔记会比较粗略, ...
- linux 基础学习入门 5 inode 总结 tr tee 等小命令
linux day 5 一.inode是什么? 理解inode,要从文件储存说起. 文件储存在硬盘上,硬盘的最小存储单位叫做"扇区"(Sector).每个扇区储存512字节(相当于 ...
- Python 基础学习 --入门(了解小常识)
计算机简介: 计算机是什么? 用来计算的机器(eg:电脑.笔记本.手机.汽车导航.智能电视......) 目前来讲,计算机只能根据人类的指令完成各种操作.学习计算机,就是学会如何去控制它. 计算机的组 ...
- linux 基础学习入门 2
菜鸟回忆 linux day 2 AM: 物理终端:直接介入本机的显示器和键盘设备 /dev/consol 虚拟终端:附加在物理终端之上的以软件方式虚拟实现的终端,设备文件路径 /dev/tty#,C ...
- 数据挖掘-3.Pandas基础
文章目录 Pandas基础 学习目标 1Pandas介绍 学习目标 1 Pandas介绍 2 为什么使用Pandas 3 小结 2 Pandas数据结构 学习目标 1.Series 1.1 Serie ...
最新文章
- PAT 1074 Reversing Linked List
- python 函数的参数
- linux——grep 文本过滤器
- PHPStorm 配置远程服务器文件夹在本地windows镜像,实现代码自动同步(类似于Samba架构文件同步功能)
- mysql大项目:新闻管理系统
- SD-WAN技术的详细解析
- ubuntu中安装sqldeveloper和JDK 1.7
- [转载] python 字符串切片_Python字符串
- macos big sur 11 完整离线安装包v11.5.2正式版
- 【面试题系列|前端面试题】前端高频面试题总结(2021年最新版)
- jrtplib的使用
- 泛函分析 06.03 线性算子的谱理论 - 有界自共轭线性算子的谱
- 基于Caffe ResNet-50网络实现图片分类(仅推理)的实验复现
- file-saver blob前端导出excel文件
- unsw计算机专业排名,新南威尔士大学计算机专业竟然这么厉害!详解新南威尔士大学计算机专业!...
- 【Android】软件开发中遇到的LUT
- 【PWM】从stm32到pwm到OLED屏幕调光到晚上不要玩手机
- Wpf大屏软件开发过程中遇到的若干问题
- GOPS2018 | 华为云运维最佳CP引领AIOps新风向
- 树莓派人脸/ic卡识别门禁系统
热门文章
- c语言幸运数字程序设计,c语言实现:4和7幸运数字的题
- 布隆过滤器原理及实践
- 苹果电脑快捷键找不到了怎么办?一步简单高效的方法请收好
- psf python_python-从图像堆叠星型PSF;对齐亚像素中心
- 如何阅读一本书_03
- 2021-07-04 IP地址与子网掩码
- PyQt5-小案例(复数计算器)
- 一种新颖的PMSM转子初始位置检测
- 《杀死比尔》:解读邪典
- [FPGA入门笔记](四):倍频器和分频器的实现