[云炬python3玩转机器学习]sklearn中的Scaler
08 Scikit-learn中的Scaler
import numpy as np
from sklearn import datasets
import datetime
print(datetime.datetime.now())
2022-01-16 23:05:58.735705
iris = datasets.load_iris()
X = iris.data
y = iris.target
X[:10,:]
array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2],[4.7, 3.2, 1.3, 0.2],[4.6, 3.1, 1.5, 0.2],[5. , 3.6, 1.4, 0.2],[5.4, 3.9, 1.7, 0.4],[4.6, 3.4, 1.4, 0.3],[5. , 3.4, 1.5, 0.2],[4.4, 2.9, 1.4, 0.2],[4.9, 3.1, 1.5, 0.1]])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=666)
scikit-learn中的StandardScaler
from sklearn.preprocessing import StandardScaler
standardScalar = StandardScaler()
standardScalar.fit(X_train)
StandardScaler()
standardScalar.mean_
array([5.83416667, 3.08666667, 3.70833333, 1.17 ])
standardScalar.scale_
array([0.81019502, 0.44327067, 1.76401924, 0.75317107])
standardScalar.transform(X_train)
array([[-0.90616043, 0.93246262, -1.30856471, -1.28788802],[-1.15301457, -0.19551636, -1.30856471, -1.28788802],[-0.16559799, -0.64670795, 0.22203084, 0.17260355],[ 0.45153738, 0.70686683, 0.95898425, 1.50032315],[-0.90616043, -1.32349533, -0.40154513, -0.09294037],[ 1.43895396, 0.25567524, 0.56216318, 0.30537551],[ 0.3281103 , -1.09789954, 1.0723617 , 0.30537551],[ 2.1795164 , -0.19551636, 1.63924894, 1.23477923],[-0.78273335, 2.2860374 , -1.25187599, -1.42065998],[ 0.45153738, -2.00028272, 0.44878573, 0.43814747],[ 1.80923518, -0.42111215, 1.46918276, 0.83646335],[ 0.69839152, 0.25567524, 0.90229552, 1.50032315],[ 0.20468323, 0.70686683, 0.44878573, 0.57091943],[-0.78273335, -0.87230374, 0.10865339, 0.30537551],[-0.53587921, 1.38365421, -1.25187599, -1.28788802],[-0.65930628, 1.38365421, -1.25187599, -1.28788802],[-1.0295875 , 0.93246262, -1.19518726, -0.75680017],[-1.77014994, -0.42111215, -1.30856471, -1.28788802],[-0.04217092, -0.87230374, 0.10865339, 0.03983159],[-0.78273335, 0.70686683, -1.30856471, -1.28788802],[-1.52329579, 0.70686683, -1.30856471, -1.15511606],[ 0.82181859, 0.25567524, 0.78891808, 1.10200727],[-0.16559799, -0.42111215, 0.27871956, 0.17260355],[ 0.94524567, -0.19551636, 0.39209701, 0.30537551],[ 0.20468323, -0.42111215, 0.44878573, 0.43814747],[-1.39986872, 0.25567524, -1.19518726, -1.28788802],[-1.15301457, 1.15805842, -1.30856471, -1.42065998],[ 1.06867274, 0.03007944, 1.0723617 , 1.63309511],[ 0.57496445, -0.87230374, 0.67554063, 0.83646335],[ 0.3281103 , -0.64670795, 0.56216318, 0.03983159],[ 0.45153738, -0.64670795, 0.6188519 , 0.83646335],[-0.16559799, 2.96282478, -1.25187599, -1.0223441 ],[ 0.57496445, -1.32349533, 0.67554063, 0.43814747],[ 0.69839152, -0.42111215, 0.33540828, 0.17260355],[-0.90616043, 1.60925001, -1.02512109, -1.0223441 ],[ 1.19209981, -0.64670795, 0.6188519 , 0.30537551],[-0.90616043, 0.93246262, -1.30856471, -1.15511606],[-1.89357701, -0.19551636, -1.47863088, -1.42065998],[ 0.08125616, -0.19551636, 0.78891808, 0.83646335],[ 0.69839152, -0.64670795, 1.0723617 , 1.23477923],[-0.28902506, -0.64670795, 0.67554063, 1.10200727],[-0.41245214, -1.54909113, -0.00472406, -0.22571233],[ 1.31552689, 0.03007944, 0.67554063, 0.43814747],[ 0.57496445, 0.70686683, 1.0723617 , 1.63309511],[ 0.82181859, -0.19551636, 1.18573914, 1.36755119],[-0.16559799, 1.60925001, -1.13849854, -1.15511606],[ 0.94524567, -0.42111215, 0.50547446, 0.17260355],[ 1.06867274, 0.48127103, 1.12905042, 1.76586707],[-1.27644165, -0.19551636, -1.30856471, -1.42065998],[-1.0295875 , 1.15805842, -1.30856471, -1.28788802],[ 0.20468323, -0.19551636, 0.6188519 , 0.83646335],[-1.0295875 , -0.19551636, -1.19518726, -1.28788802],[ 0.3281103 , -0.19551636, 0.67554063, 0.83646335],[ 0.69839152, 0.03007944, 1.01567297, 0.83646335],[-0.90616043, 1.38365421, -1.25187599, -1.0223441 ],[-0.16559799, -0.19551636, 0.27871956, 0.03983159],[-1.0295875 , 0.93246262, -1.36525344, -1.15511606],[-0.90616043, 1.60925001, -1.25187599, -1.15511606],[-1.52329579, 0.25567524, -1.30856471, -1.28788802],[-0.53587921, -0.19551636, 0.44878573, 0.43814747],[ 0.82181859, -0.64670795, 0.50547446, 0.43814747],[ 0.3281103 , -0.64670795, 0.16534211, 0.17260355],[-1.27644165, 0.70686683, -1.19518726, -1.28788802],[-0.90616043, 0.48127103, -1.13849854, -0.88957213],[-0.04217092, -0.87230374, 0.78891808, 0.96923531],[-0.28902506, -0.19551636, 0.22203084, 0.17260355],[ 0.57496445, -0.64670795, 0.78891808, 0.43814747],[ 1.06867274, 0.48127103, 1.12905042, 1.23477923],[ 1.68580811, -0.19551636, 1.18573914, 0.57091943],[ 1.06867274, -0.19551636, 0.8456068 , 1.50032315],[-1.15301457, 0.03007944, -1.25187599, -1.42065998],[-1.15301457, -1.32349533, 0.44878573, 0.70369139],[-0.16559799, -1.32349533, 0.73222935, 1.10200727],[-1.15301457, -1.54909113, -0.23147896, -0.22571233],[-0.41245214, -1.54909113, 0.05196466, -0.09294037],[ 1.06867274, -1.32349533, 1.18573914, 0.83646335],[ 0.82181859, -0.19551636, 1.01567297, 0.83646335],[-0.16559799, -1.09789954, -0.11810151, -0.22571233],[ 0.20468323, -2.00028272, 0.73222935, 0.43814747],[ 1.06867274, 0.03007944, 0.56216318, 0.43814747],[-1.15301457, 0.03007944, -1.25187599, -1.28788802],[ 0.57496445, -1.32349533, 0.73222935, 0.96923531],[-1.39986872, 0.25567524, -1.36525344, -1.28788802],[ 0.20468323, -0.87230374, 0.78891808, 0.57091943],[-0.04217092, -1.09789954, 0.16534211, 0.03983159],[ 1.31552689, 0.25567524, 1.12905042, 1.50032315],[-1.77014994, -0.19551636, -1.36525344, -1.28788802],[ 1.56238103, -0.19551636, 1.24242787, 1.23477923],[ 1.19209981, 0.25567524, 1.24242787, 1.50032315],[-0.78273335, 0.93246262, -1.25187599, -1.28788802],[ 2.54979762, 1.60925001, 1.52587149, 1.10200727],[ 0.69839152, -0.64670795, 1.0723617 , 1.36755119],[-0.28902506, -0.42111215, -0.06141278, 0.17260355],[-0.41245214, 2.51163319, -1.30856471, -1.28788802],[-1.27644165, -0.19551636, -1.30856471, -1.15511606],[ 0.57496445, -0.42111215, 1.0723617 , 0.83646335],[-1.77014994, 0.25567524, -1.36525344, -1.28788802],[-0.53587921, 1.8348458 , -1.13849854, -1.0223441 ],[-1.0295875 , 0.70686683, -1.19518726, -1.0223441 ],[ 1.06867274, -0.19551636, 0.73222935, 0.70369139],[-0.53587921, 1.8348458 , -1.36525344, -1.0223441 ],[ 2.30294347, -0.64670795, 1.69593766, 1.10200727],[-0.28902506, -0.87230374, 0.27871956, 0.17260355],[ 1.19209981, -0.19551636, 1.01567297, 1.23477923],[-0.41245214, 0.93246262, -1.36525344, -1.28788802],[-1.27644165, 0.70686683, -1.02512109, -1.28788802],[-0.53587921, 0.70686683, -1.13849854, -1.28788802],[ 2.30294347, 1.60925001, 1.69593766, 1.36755119],[ 1.31552689, 0.03007944, 0.95898425, 1.23477923],[-0.28902506, -1.32349533, 0.10865339, -0.09294037],[-0.90616043, 0.70686683, -1.25187599, -1.28788802],[-0.90616043, 1.60925001, -1.19518726, -1.28788802],[ 0.3281103 , -0.42111215, 0.56216318, 0.30537551],[-0.04217092, 2.0604416 , -1.42194216, -1.28788802],[-1.0295875 , -2.45147431, -0.11810151, -0.22571233],[ 0.69839152, 0.25567524, 0.44878573, 0.43814747],[ 0.3281103 , -0.19551636, 0.50547446, 0.30537551],[ 0.08125616, 0.25567524, 0.6188519 , 0.83646335],[ 0.20468323, -2.00028272, 0.16534211, -0.22571233],[ 1.93266225, -0.64670795, 1.35580532, 0.96923531]])
X_train[:10,:]
array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2],[5.7, 2.8, 4.1, 1.3],[6.2, 3.4, 5.4, 2.3],[5.1, 2.5, 3. , 1.1],[7. , 3.2, 4.7, 1.4],[6.1, 2.6, 5.6, 1.4],[7.6, 3. , 6.6, 2.1],[5.2, 4.1, 1.5, 0.1],[6.2, 2.2, 4.5, 1.5]])
X_train = standardScalar.transform(X_train)
X_train[:10,:]
array([[-0.90616043, 0.93246262, -1.30856471, -1.28788802],[-1.15301457, -0.19551636, -1.30856471, -1.28788802],[-0.16559799, -0.64670795, 0.22203084, 0.17260355],[ 0.45153738, 0.70686683, 0.95898425, 1.50032315],[-0.90616043, -1.32349533, -0.40154513, -0.09294037],[ 1.43895396, 0.25567524, 0.56216318, 0.30537551],[ 0.3281103 , -1.09789954, 1.0723617 , 0.30537551],[ 2.1795164 , -0.19551636, 1.63924894, 1.23477923],[-0.78273335, 2.2860374 , -1.25187599, -1.42065998],[ 0.45153738, -2.00028272, 0.44878573, 0.43814747]])
X_test_standard = standardScalar.transform(X_test)
X_test_standard[:10,:]
array([[-0.28902506, -0.19551636, 0.44878573, 0.43814747],[-0.04217092, -0.64670795, 0.78891808, 1.63309511],[-1.0295875 , -1.77468693, -0.23147896, -0.22571233],[-0.04217092, -0.87230374, 0.78891808, 0.96923531],[-1.52329579, 0.03007944, -1.25187599, -1.28788802],[-0.41245214, -1.32349533, 0.16534211, 0.17260355],[-0.16559799, -0.64670795, 0.44878573, 0.17260355],[ 0.82181859, -0.19551636, 0.8456068 , 1.10200727],[ 0.57496445, -1.77468693, 0.39209701, 0.17260355],[-0.41245214, -1.09789954, 0.39209701, 0.03983159]])
使用归一化后的数据进行knn分类from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=3)
knn_clf.score(X_test_standard, y_test)
1.0
注意,此时不能传入没有归一化的数据!knn_clf.score(X_test, y_test)
0.3333333333333333
实现我们自己的standardScaler
代码参见:这里X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=666)
from playML.preprocessing import StandardScalermy_standardScalar = StandardScaler()
my_standardScalar.fit(X_train)
<playML.preprocessing.StandardScaler at 0x435ce64340>
my_standardScalar.mean_
array([5.83416667, 3.08666667, 3.70833333, 1.17 ])
my_standardScalar.scale_
array([0.81019502, 0.44327067, 1.76401924, 0.75317107])
X_train = standardScalar.transform(X_train)
X_train[:10,:]
array([[ -8.31938844, -4.85979375, -2.84401549, -3.2633861 ],[ -8.62407329, -7.40446691, -2.84401549, -3.2633861 ],[ -7.4053339 , -8.42233617, -1.9763404 , -1.32426282],[ -6.64362178, -5.36872838, -1.55857092, 0.43857653],[ -8.31938844, -9.94914006, -2.32983766, -1.67683069],[ -5.4248824 , -6.38659765, -1.78352372, -1.14797889],[ -6.79596421, -9.44020543, -1.49429869, -1.14797889],[ -4.51082786, -7.40446691, -1.17293754, 0.08600866],[ -8.16704602, -1.80618597, -2.81187937, -3.43967004],[ -6.64362178, -11.47594395, -1.84779594, -0.97169495]])
Scikit-Learn中的最值归一化
MinMaxScaler: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html练习:同学们也可以尝试实现自己的MinMaxScaler:)
[云炬python3玩转机器学习]sklearn中的Scaler相关推荐
- [云炬python3玩转机器学习笔记] 1-3课程所使用的主要技术栈
课程环境 语言:Python3 框架:Scikit-learn 其他框架:numpy,matplotlib... IDE:Jupyter Notebook,PyCharm,ANACONDA 课程学习基 ...
- [云炬python3玩转机器学习笔记] 3-2 Jupter Notebook魔法命令
xxxxxxxxxx### %run %run¶ In [1]:%run myscript/hello.py hello Machine Learning ! . . .In [2]:xxxxxxxx ...
- [云炬python3玩转机器学习]4-3 训练数据集,测试数据集
03 测试我们的算法 import numpy as np import matplotlib.pyplot as plt from sklearn import datasets iris = da ...
- [云炬python3玩转机器学习笔记] 3-1 Jupyter Notebook
1+2for _ in range(5):print("Hello, Machine Learning!")5+5*29+9print("天津云炬网络科技有限公司&quo ...
- [云炬python3玩转机器学习] 5-9 scikit-learn中的回归问题
09 scikit-learn中的回归问题 import numpy as np import matplotlib.pyplot as plt from sklearn import dataset ...
- [云炬python3玩转机器学习] 6-4 在线性回归模型中使用梯度下降法
在线性回归模型中使用梯度下降法 In [1]: import numpy as np import matplotlib.pyplot as plt import datetime;print ('R ...
- [云炬python3玩转机器学习笔记] 3-8Numpy中的聚合运算
聚合操作 import numpy as npL=np.random.random(100) L array([6.40912934e-01, 6.68707312e-01, 3.34817109e- ...
- [云炬python3玩转机器学习笔记] 3-7Numpy中的矩阵运算
numpy.array中的运算 给定一个向量,让向量中每一个数乘以2 a=(0,1,2) a*1=(0,2,4) n=10 L=[i for i in range(n)] 2*L [0, 1, 2, ...
- [云炬python3玩转机器学习]6-3线性回归中的梯度下降法
最新文章
- python五十四:isinstance和issubclass
- 从老赖们“维权”,看拍拍贷的底色
- 为什么判断 n 是否为质数只需除到开平方根就行了?(直接证明)
- “财务自由的15个阶段!说说你到哪个阶段了?”
- 小白学数据:教你用Python实现简单监督学习算法
- 深度优先遍历解决连通域求解问题-python实现
- dismiss ios pop效果_iOS 动画框架pop使用方法
- Win10提示不是有效的字体文件怎么解决
- 黑侠百度URL批量推送程序
- 为什么开发人员应该学习 Kubernetes?
- freertos nand flash 读取错误_Flash失效小谈
- VB.NET中DataGridView控件
- 数学建模2015A题(太阳影子定位)
- 微信小程序底部导航栏小效果
- WINCE ROM 定制大教程
- 高德地图3D离线定位
- 无盘服务器磁盘4k对齐,4K对齐:Win7磁盘管理分区教程_硬盘_内存硬盘技巧-中关村在线...
- deepinV20 显卡驱动 cuda10.2+cudnn配置
- android6.0 PowerManagerService状态分析
- html 拓扑图 开源,GitHub - pylixm/zJTopo: 开源拓扑图工具类jTopo的扩展,jtopo是一个不错的拓扑图,基于html5 canvas,功能强大...