


import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, None, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.sum()



import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, None, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.mean()
print t.mean(skipna=False)



import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.quantile()
print t.quantile(0.5)
print t.quantile(0.25)
print t.quantile(0.75)



import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.describe()


count       4.000000
mean      409.500000
std       404.699477
min       104.000000
25%       176.750000
50%       267.000000
75%       499.750000
max      1000.000000
dtype: float64



import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.max()
print t.idxmax()


hello    1000
the       201
cruel     333
world     104
dtype: int64 <- t



  • var函数计算方差,方差Variance反映的是模型每一次输出结果与模型输出期望(平均值)之间的误差,即模型的稳定性,在pandas的series里可以用var函数计算。
import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.var(), "\t<- var"


hello    1000
the       201
cruel     333
world     104
dtype: int64 <- t
163781.66666666666  <- var



import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val =  [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.var(), "\t<- var"
x =  val
mu = t.mean()
y = [np.square(v - mu) for v in x]
print np.sum(y) / 3


hello    1000
the       201
cruel     333
world     104
dtype: int64 <- t
163781.66666666666  <- var

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val =  [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"
print t.var(), "\t<- var"
x =  val
mu = t.mean()
y = [np.square(v - mu) for v in x]
delta2 = np.sum(y) / 3
print delta2
print np.sqrt(delta2)
print t.std(), "\t<- std"


hello    1000
the       201
cruel     333
world     104
dtype: int64 <- t
163781.66666666666  <- var
404.6994769784941   <- std

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val =  [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
x =  val
mu = t.mean()
y = [np.abs(v - mu) for v in x]
md = np.sum(y) / 4
print md
print t.mad(), "\t<- mad"


295.25  <- mad

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
x = pd.Series(val, index = idx)
van = [1100, 221, 303, 84]
y = pd.Series(van, index = idx)
xt =  val
mux = x.mean()
yt = van
muy = y.mean()
xx = [v - mux for v in xt]
yy = [v - muy for v in yt]
print xx
print yy
print np.sum(np.array(xx).dot(np.array(yy))) / 3
print x.cov(y), "\t<- cov"


[590.5, -208.5, -76.5, -305.5]
[673.0, -206.0, -124.0, -343.0]
184876.66666666666  <- cov

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
x = pd.Series(val, index = idx)
van = [1100, 221, 303, 84]
y = pd.Series(van, index = idx)
xt =  val
mux = x.mean()
yt = van
muy = y.mean()
xx = [v - mux for v in xt]
yy = [v - muy for v in yt]
xx2 = [np.square(v - mux) for v in xt]
yy2 = [np.square(v - muy) for v in yt]
cov = np.sum(np.array(xx).dot(np.array(yy)))
muxy = np.sqrt(np.sum(xx2)) * np.sqrt(np.sum(yy2))
print cov / muxy
print x.corr(y), "\t<- corr"


0.9981491788769461  <- corr

1). 利用kurt计算峰度值。

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
x = pd.Series(val, index = idx)
n = 4
mu = x.mean()
delta = x.std()
xu = [np.power((v - mu), 4) for v in val]
print (1.0 * n *(n + 1))/ ((n-1)*(n-2)*(n-3)) * np.sum(xu) / delta ** 4 - 3.0 * (n - 1) ** 2 / (n-2)*(n-3) , "<-python"
print x.kurt(), "<- kurt"


2.93023293658 <-python
2.93023293658 <- kurt

2). skew函数可以偏态值。

import pandas as pd
import numpy as np
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
x = pd.Series(val, index = idx)
n = 4
mu = x.mean()
delta = x.std()
xu = [np.power((v - mu), 3) for v in val]
print (1.0 * n) / ((n - 1)*(n - 2))*np.sum(xu) / delta ** 3, "<- python"
print x.skew(), "<- skew"


1.68850911034 <- python
1.68850911034 <- skew


import pandas as pd
idx =  "hello the cruel world".split()
val = [1000, 201, 333, 104]
t = pd.Series(val, index = idx)
print t, "<- t"print t.cumsum(), "\t<- cumsum"
print t.cumprod(), "\t<- cumprod"
print t.cummin(), "\t<- cummin"


hello    1000
the       201
cruel     333
world     104
dtype: int64 <- t
hello    1000
the      1201
cruel    1534
world    1638
dtype: int64    <- cumsum
hello          1000
the          201000
cruel      66933000
world    6961032000
dtype: int64    <- cumprod
hello    1000
the       201
cruel     201
world     104
dtype: int64    <- cummin


  1. pandas 遍历 series

