08 Scikit-learn中的Scaler
import numpy as np
from sklearn import datasets
import datetime
print(datetime.datetime.now())
2022-01-16 23:05:58.735705
iris = datasets.load_iris()
X = iris.data
y = iris.target
X[:10,:]
array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2],[4.7, 3.2, 1.3, 0.2],[4.6, 3.1, 1.5, 0.2],[5. , 3.6, 1.4, 0.2],[5.4, 3.9, 1.7, 0.4],[4.6, 3.4, 1.4, 0.3],[5. , 3.4, 1.5, 0.2],[4.4, 2.9, 1.4, 0.2],[4.9, 3.1, 1.5, 0.1]])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=666)
scikit-learn中的StandardScaler
from sklearn.preprocessing import StandardScaler
standardScalar = StandardScaler()
standardScalar.fit(X_train)
StandardScaler()
standardScalar.mean_
array([5.83416667, 3.08666667, 3.70833333, 1.17      ])
standardScalar.scale_
array([0.81019502, 0.44327067, 1.76401924, 0.75317107])
standardScalar.transform(X_train)
array([[-0.90616043,  0.93246262, -1.30856471, -1.28788802],[-1.15301457, -0.19551636, -1.30856471, -1.28788802],[-0.16559799, -0.64670795,  0.22203084,  0.17260355],[ 0.45153738,  0.70686683,  0.95898425,  1.50032315],[-0.90616043, -1.32349533, -0.40154513, -0.09294037],[ 1.43895396,  0.25567524,  0.56216318,  0.30537551],[ 0.3281103 , -1.09789954,  1.0723617 ,  0.30537551],[ 2.1795164 , -0.19551636,  1.63924894,  1.23477923],[-0.78273335,  2.2860374 , -1.25187599, -1.42065998],[ 0.45153738, -2.00028272,  0.44878573,  0.43814747],[ 1.80923518, -0.42111215,  1.46918276,  0.83646335],[ 0.69839152,  0.25567524,  0.90229552,  1.50032315],[ 0.20468323,  0.70686683,  0.44878573,  0.57091943],[-0.78273335, -0.87230374,  0.10865339,  0.30537551],[-0.53587921,  1.38365421, -1.25187599, -1.28788802],[-0.65930628,  1.38365421, -1.25187599, -1.28788802],[-1.0295875 ,  0.93246262, -1.19518726, -0.75680017],[-1.77014994, -0.42111215, -1.30856471, -1.28788802],[-0.04217092, -0.87230374,  0.10865339,  0.03983159],[-0.78273335,  0.70686683, -1.30856471, -1.28788802],[-1.52329579,  0.70686683, -1.30856471, -1.15511606],[ 0.82181859,  0.25567524,  0.78891808,  1.10200727],[-0.16559799, -0.42111215,  0.27871956,  0.17260355],[ 0.94524567, -0.19551636,  0.39209701,  0.30537551],[ 0.20468323, -0.42111215,  0.44878573,  0.43814747],[-1.39986872,  0.25567524, -1.19518726, -1.28788802],[-1.15301457,  1.15805842, -1.30856471, -1.42065998],[ 1.06867274,  0.03007944,  1.0723617 ,  1.63309511],[ 0.57496445, -0.87230374,  0.67554063,  0.83646335],[ 0.3281103 , -0.64670795,  0.56216318,  0.03983159],[ 0.45153738, -0.64670795,  0.6188519 ,  0.83646335],[-0.16559799,  2.96282478, -1.25187599, -1.0223441 ],[ 0.57496445, -1.32349533,  0.67554063,  0.43814747],[ 0.69839152, -0.42111215,  0.33540828,  0.17260355],[-0.90616043,  1.60925001, -1.02512109, -1.0223441 ],[ 1.19209981, -0.64670795,  0.6188519 ,  0.30537551],[-0.90616043,  0.93246262, -1.30856471, -1.15511606],[-1.89357701, -0.19551636, -1.47863088, -1.42065998],[ 0.08125616, -0.19551636,  0.78891808,  0.83646335],[ 0.69839152, -0.64670795,  1.0723617 ,  1.23477923],[-0.28902506, -0.64670795,  0.67554063,  1.10200727],[-0.41245214, -1.54909113, -0.00472406, -0.22571233],[ 1.31552689,  0.03007944,  0.67554063,  0.43814747],[ 0.57496445,  0.70686683,  1.0723617 ,  1.63309511],[ 0.82181859, -0.19551636,  1.18573914,  1.36755119],[-0.16559799,  1.60925001, -1.13849854, -1.15511606],[ 0.94524567, -0.42111215,  0.50547446,  0.17260355],[ 1.06867274,  0.48127103,  1.12905042,  1.76586707],[-1.27644165, -0.19551636, -1.30856471, -1.42065998],[-1.0295875 ,  1.15805842, -1.30856471, -1.28788802],[ 0.20468323, -0.19551636,  0.6188519 ,  0.83646335],[-1.0295875 , -0.19551636, -1.19518726, -1.28788802],[ 0.3281103 , -0.19551636,  0.67554063,  0.83646335],[ 0.69839152,  0.03007944,  1.01567297,  0.83646335],[-0.90616043,  1.38365421, -1.25187599, -1.0223441 ],[-0.16559799, -0.19551636,  0.27871956,  0.03983159],[-1.0295875 ,  0.93246262, -1.36525344, -1.15511606],[-0.90616043,  1.60925001, -1.25187599, -1.15511606],[-1.52329579,  0.25567524, -1.30856471, -1.28788802],[-0.53587921, -0.19551636,  0.44878573,  0.43814747],[ 0.82181859, -0.64670795,  0.50547446,  0.43814747],[ 0.3281103 , -0.64670795,  0.16534211,  0.17260355],[-1.27644165,  0.70686683, -1.19518726, -1.28788802],[-0.90616043,  0.48127103, -1.13849854, -0.88957213],[-0.04217092, -0.87230374,  0.78891808,  0.96923531],[-0.28902506, -0.19551636,  0.22203084,  0.17260355],[ 0.57496445, -0.64670795,  0.78891808,  0.43814747],[ 1.06867274,  0.48127103,  1.12905042,  1.23477923],[ 1.68580811, -0.19551636,  1.18573914,  0.57091943],[ 1.06867274, -0.19551636,  0.8456068 ,  1.50032315],[-1.15301457,  0.03007944, -1.25187599, -1.42065998],[-1.15301457, -1.32349533,  0.44878573,  0.70369139],[-0.16559799, -1.32349533,  0.73222935,  1.10200727],[-1.15301457, -1.54909113, -0.23147896, -0.22571233],[-0.41245214, -1.54909113,  0.05196466, -0.09294037],[ 1.06867274, -1.32349533,  1.18573914,  0.83646335],[ 0.82181859, -0.19551636,  1.01567297,  0.83646335],[-0.16559799, -1.09789954, -0.11810151, -0.22571233],[ 0.20468323, -2.00028272,  0.73222935,  0.43814747],[ 1.06867274,  0.03007944,  0.56216318,  0.43814747],[-1.15301457,  0.03007944, -1.25187599, -1.28788802],[ 0.57496445, -1.32349533,  0.73222935,  0.96923531],[-1.39986872,  0.25567524, -1.36525344, -1.28788802],[ 0.20468323, -0.87230374,  0.78891808,  0.57091943],[-0.04217092, -1.09789954,  0.16534211,  0.03983159],[ 1.31552689,  0.25567524,  1.12905042,  1.50032315],[-1.77014994, -0.19551636, -1.36525344, -1.28788802],[ 1.56238103, -0.19551636,  1.24242787,  1.23477923],[ 1.19209981,  0.25567524,  1.24242787,  1.50032315],[-0.78273335,  0.93246262, -1.25187599, -1.28788802],[ 2.54979762,  1.60925001,  1.52587149,  1.10200727],[ 0.69839152, -0.64670795,  1.0723617 ,  1.36755119],[-0.28902506, -0.42111215, -0.06141278,  0.17260355],[-0.41245214,  2.51163319, -1.30856471, -1.28788802],[-1.27644165, -0.19551636, -1.30856471, -1.15511606],[ 0.57496445, -0.42111215,  1.0723617 ,  0.83646335],[-1.77014994,  0.25567524, -1.36525344, -1.28788802],[-0.53587921,  1.8348458 , -1.13849854, -1.0223441 ],[-1.0295875 ,  0.70686683, -1.19518726, -1.0223441 ],[ 1.06867274, -0.19551636,  0.73222935,  0.70369139],[-0.53587921,  1.8348458 , -1.36525344, -1.0223441 ],[ 2.30294347, -0.64670795,  1.69593766,  1.10200727],[-0.28902506, -0.87230374,  0.27871956,  0.17260355],[ 1.19209981, -0.19551636,  1.01567297,  1.23477923],[-0.41245214,  0.93246262, -1.36525344, -1.28788802],[-1.27644165,  0.70686683, -1.02512109, -1.28788802],[-0.53587921,  0.70686683, -1.13849854, -1.28788802],[ 2.30294347,  1.60925001,  1.69593766,  1.36755119],[ 1.31552689,  0.03007944,  0.95898425,  1.23477923],[-0.28902506, -1.32349533,  0.10865339, -0.09294037],[-0.90616043,  0.70686683, -1.25187599, -1.28788802],[-0.90616043,  1.60925001, -1.19518726, -1.28788802],[ 0.3281103 , -0.42111215,  0.56216318,  0.30537551],[-0.04217092,  2.0604416 , -1.42194216, -1.28788802],[-1.0295875 , -2.45147431, -0.11810151, -0.22571233],[ 0.69839152,  0.25567524,  0.44878573,  0.43814747],[ 0.3281103 , -0.19551636,  0.50547446,  0.30537551],[ 0.08125616,  0.25567524,  0.6188519 ,  0.83646335],[ 0.20468323, -2.00028272,  0.16534211, -0.22571233],[ 1.93266225, -0.64670795,  1.35580532,  0.96923531]])
X_train[:10,:]
array([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2],[5.7, 2.8, 4.1, 1.3],[6.2, 3.4, 5.4, 2.3],[5.1, 2.5, 3. , 1.1],[7. , 3.2, 4.7, 1.4],[6.1, 2.6, 5.6, 1.4],[7.6, 3. , 6.6, 2.1],[5.2, 4.1, 1.5, 0.1],[6.2, 2.2, 4.5, 1.5]])
X_train = standardScalar.transform(X_train)
X_train[:10,:]
array([[-0.90616043,  0.93246262, -1.30856471, -1.28788802],[-1.15301457, -0.19551636, -1.30856471, -1.28788802],[-0.16559799, -0.64670795,  0.22203084,  0.17260355],[ 0.45153738,  0.70686683,  0.95898425,  1.50032315],[-0.90616043, -1.32349533, -0.40154513, -0.09294037],[ 1.43895396,  0.25567524,  0.56216318,  0.30537551],[ 0.3281103 , -1.09789954,  1.0723617 ,  0.30537551],[ 2.1795164 , -0.19551636,  1.63924894,  1.23477923],[-0.78273335,  2.2860374 , -1.25187599, -1.42065998],[ 0.45153738, -2.00028272,  0.44878573,  0.43814747]])
X_test_standard = standardScalar.transform(X_test)
X_test_standard[:10,:]
array([[-0.28902506, -0.19551636,  0.44878573,  0.43814747],[-0.04217092, -0.64670795,  0.78891808,  1.63309511],[-1.0295875 , -1.77468693, -0.23147896, -0.22571233],[-0.04217092, -0.87230374,  0.78891808,  0.96923531],[-1.52329579,  0.03007944, -1.25187599, -1.28788802],[-0.41245214, -1.32349533,  0.16534211,  0.17260355],[-0.16559799, -0.64670795,  0.44878573,  0.17260355],[ 0.82181859, -0.19551636,  0.8456068 ,  1.10200727],[ 0.57496445, -1.77468693,  0.39209701,  0.17260355],[-0.41245214, -1.09789954,  0.39209701,  0.03983159]])
使用归一化后的数据进行knn分类from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=3)
knn_clf.score(X_test_standard, y_test)
1.0
注意,此时不能传入没有归一化的数据!knn_clf.score(X_test, y_test)
0.3333333333333333
实现我们自己的standardScaler
代码参见:这里X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=666)
from playML.preprocessing import StandardScalermy_standardScalar = StandardScaler()
my_standardScalar.fit(X_train)
<playML.preprocessing.StandardScaler at 0x435ce64340>
my_standardScalar.mean_
array([5.83416667, 3.08666667, 3.70833333, 1.17      ])
my_standardScalar.scale_
array([0.81019502, 0.44327067, 1.76401924, 0.75317107])
X_train = standardScalar.transform(X_train)
X_train[:10,:]
array([[ -8.31938844,  -4.85979375,  -2.84401549,  -3.2633861 ],[ -8.62407329,  -7.40446691,  -2.84401549,  -3.2633861 ],[ -7.4053339 ,  -8.42233617,  -1.9763404 ,  -1.32426282],[ -6.64362178,  -5.36872838,  -1.55857092,   0.43857653],[ -8.31938844,  -9.94914006,  -2.32983766,  -1.67683069],[ -5.4248824 ,  -6.38659765,  -1.78352372,  -1.14797889],[ -6.79596421,  -9.44020543,  -1.49429869,  -1.14797889],[ -4.51082786,  -7.40446691,  -1.17293754,   0.08600866],[ -8.16704602,  -1.80618597,  -2.81187937,  -3.43967004],[ -6.64362178, -11.47594395,  -1.84779594,  -0.97169495]])
Scikit-Learn中的最值归一化
MinMaxScaler: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html练习:同学们也可以尝试实现自己的MinMaxScaler:)

[云炬python3玩转机器学习]sklearn中的Scaler相关推荐

  1. [云炬python3玩转机器学习笔记] 1-3课程所使用的主要技术栈

    课程环境 语言:Python3 框架:Scikit-learn 其他框架:numpy,matplotlib... IDE:Jupyter Notebook,PyCharm,ANACONDA 课程学习基 ...

  2. [云炬python3玩转机器学习笔记] 3-2 Jupter Notebook魔法命令

    xxxxxxxxxx### %run %run¶ In [1]:%run myscript/hello.py hello Machine Learning ! . . .In [2]:xxxxxxxx ...

  3. [云炬python3玩转机器学习]4-3 训练数据集,测试数据集

    03 测试我们的算法 import numpy as np import matplotlib.pyplot as plt from sklearn import datasets iris = da ...

  4. [云炬python3玩转机器学习笔记] 3-1 Jupyter Notebook

    1+2for _ in range(5):print("Hello, Machine Learning!")5+5*29+9print("天津云炬网络科技有限公司&quo ...

  5. [云炬python3玩转机器学习] 5-9 scikit-learn中的回归问题

    09 scikit-learn中的回归问题 import numpy as np import matplotlib.pyplot as plt from sklearn import dataset ...

  6. [云炬python3玩转机器学习] 6-4 在线性回归模型中使用梯度下降法

    在线性回归模型中使用梯度下降法 In [1]: import numpy as np import matplotlib.pyplot as plt import datetime;print ('R ...

  7. [云炬python3玩转机器学习笔记] 3-8Numpy中的聚合运算

    聚合操作 import numpy as npL=np.random.random(100) L array([6.40912934e-01, 6.68707312e-01, 3.34817109e- ...

  8. [云炬python3玩转机器学习笔记] 3-7Numpy中的矩阵运算

    numpy.array中的运算 给定一个向量,让向量中每一个数乘以2 a=(0,1,2) a*1=(0,2,4) n=10 L=[i for i in range(n)] 2*L [0, 1, 2, ...

  9. [云炬python3玩转机器学习]6-3线性回归中的梯度下降法

最新文章

  1. python五十四:isinstance和issubclass
  2. 从老赖们“维权”,看拍拍贷的底色
  3. 为什么判断 n 是否为质数只需除到开平方根就行了?(直接证明)
  4. “财务自由的15个阶段!说说你到哪个阶段了?”
  5. 小白学数据:教你用Python实现简单监督学习算法
  6. 深度优先遍历解决连通域求解问题-python实现
  7. dismiss ios pop效果_iOS 动画框架pop使用方法
  8. Win10提示不是有效的字体文件怎么解决
  9. 黑侠百度URL批量推送程序
  10. 为什么开发人员应该学习 Kubernetes?
  11. freertos nand flash 读取错误_Flash失效小谈
  12. VB.NET中DataGridView控件
  13. 数学建模2015A题(太阳影子定位)
  14. 微信小程序底部导航栏小效果
  15. WINCE ROM 定制大教程
  16. 高德地图3D离线定位
  17. 无盘服务器磁盘4k对齐,4K对齐:Win7磁盘管理分区教程_硬盘_内存硬盘技巧-中关村在线...
  18. deepinV20 显卡驱动 cuda10.2+cudnn配置
  19. android6.0 PowerManagerService状态分析
  20. html 拓扑图 开源,GitHub - pylixm/zJTopo: 开源拓扑图工具类jTopo的扩展,jtopo是一个不错的拓扑图,基于html5 canvas,功能强大...

热门文章

  1. 我的自我介绍以及决心书
  2. Maven中settings.xml的配置项说明
  3. Oracle执行计划顺序
  4. 怎么设置tomcat管理员的用户名和密码
  5. 学习笔记——基本光照模型简单实现
  6. jquery插件分类与编写详细讲解
  7. 安全扫描工具​Nmap引擎理解文档
  8. 嵌入式Linux中I2C设备驱动程序的研究与实现
  9. 如何解决安装瑞星后用FoxMail收发邮件速度慢的问题。
  10. hdu 1394(树状数组求逆序数)