15.

import pandas as pd
import numpy as npdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_15_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])return X_train, y_train, data_sizedef PLA():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))num_iters = 0while True:flag = Falsefor i in range(data_size):if np.dot(X_train[i, :], w) * y_train[i, :] <= 0:w += y_train[i, :] * X_train[i, :].reshape(5, 1)flag = Truenum_iters += 1if flag == False:breakreturn num_iters, wif __name__ == '__main__':num, w = PLA()X_train, y_train, iter = get_training_set()print(num)

结果为

16.

在15题基础上进行修改,打乱数据序列,使用np.random.permutation(),返回一个新序列而不改变原始数据序列

import pandas as pd
import numpy as npdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_15_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])# 打乱数据list = np.random.permutation(data_size)X_train = X_train[list]y_train = y_train[list]return X_train, y_train, data_sizedef PLA():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))num_iters = 0while True:flag = Falsefor i in range(data_size):if np.dot(X_train[i, :], w)*y_train[i, :] <= 0:w += y_train[i, :]*X_train[i, :].reshape(5, 1)flag = Truenum_iters += 1if flag == False:breakreturn num_itersif __name__ == '__main__':sum = 0for i in range(2000):num = PLA()sum += numave_sum = sum/2000print(ave_sum)

结果为

17.

在16题的基础上加上一个学习速率alpha,修改PLA()函数,更新w时加上学习速率alpha

import pandas as pd
import numpy as npdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_15_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])# 打乱数据list = np.random.permutation(data_size)X_train = X_train[list]y_train = y_train[list]return X_train, y_train, data_sizedef PLA():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))num_iters = 0alpha = 0.5while True:flag = Falsefor i in range(data_size):if np.dot(X_train[i, :], w) * y_train[i, :] <= 0:#添加学习速率alphaw += alpha * y_train[i, :] * X_train[i, :].reshape(5, 1)flag = Truenum_iters += 1if flag == False:breakreturn num_itersif __name__ == '__main__':sum = 0for i in range(2000):num = PLA()sum += numave_sum = sum / 2000print(ave_sum)

运行结果为

18.

import pandas as pd
import numpy as np
import copy as cpdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_18_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])# 打乱数据list = np.random.permutation(data_size)X_train = X_train[list]y_train = y_train[list]return X_train, y_train, data_sizedef get_test_set():data = pd.read_csv('F:\\Kaggle\\hw1_18_test.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 初始化测试集test_set = np.array(data)# 测试集大小test_size = test_set.shape[0]# 测试X与y分组X_val = test_set[:, 0:4]X_val = np.hstack((np.ones((test_size, 1)), X_val))y_val = test_set[:, 4:5]return X_val, y_val, test_sizedef Pocket():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))count = 0max_iters = data_sizew_pocket = np.zeros((5, 1))for i in range(data_size):if np.dot(X_train[i, :], w) * y_train[i, :] <= 0:w += 0.5 * y_train[i, :] * X_train[i, :].reshape(5, 1)#计数器加一count += 1validation = 0#验证w,是否放入口袋for j in range(data_size):if np.dot(X_train[j, :], w) * y_train[j, :] <= 0:validation += 1if validation < max_iters:#验证次数,如果比上一次小,就将新的w放入pocketmax_iters = validationw_pocket = cp.deepcopy(w)#迭代次数达到50次时,退出循环if 50 == count:breakreturn w_pocketdef Pocket_Error(w_pocket):X_val, y_val, test_size = get_test_set()error_num = 0# 验证错误个数for i in range(test_size):if np.dot(X_val[i, :], w_pocket) * y_val[i, :] <= 0:error_num += 1error_ratio = error_num/test_sizereturn error_ratioif __name__ == '__main__':ratio_sum = 0for i in range(2000):w_pocket = Pocket()ratio = Pocket_Error(w_pocket)ratio_sum += ratioave_ratio = ratio_sum/2000print(ave_ratio)

计算结果为,特么这道题一开始结果做错了找了一晚上错最后发现用的第一题的数据在算第二题!!!_(¦3」∠)_

19.

import pandas as pd
import numpy as np
import copy as cpdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_18_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])# 打乱数据list = np.random.permutation(data_size)X_train = X_train[list]y_train = y_train[list]return X_train, y_train, data_sizedef get_test_set():data = pd.read_csv('F:\\Kaggle\\hw1_18_test.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 初始化测试集test_set = np.array(data)# 测试集大小test_size = test_set.shape[0]# 测试X与y分组X_val = test_set[:, 0:4]X_val = np.hstack((np.ones((test_size, 1)), X_val))y_val = test_set[:, 4:5]return X_val, y_val, test_sizedef Pocket():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))count = 0for i in range(data_size):if np.dot(X_train[i, :], w) * y_train[i, :] <= 0:w += 0.5 * y_train[i, :] * X_train[i, :].reshape(5, 1)#计数器加一count += 1#迭代次数达到50次时,退出循环if 50 == count:breakreturn wdef Pocket_Error(w_pocket):X_val, y_val, test_size = get_test_set()error_num = 0# 验证错误个数for i in range(test_size):if np.dot(X_val[i, :], w_pocket) * y_val[i, :] <= 0:error_num += 1error_ratio = error_num/test_sizereturn error_ratioif __name__ == '__main__':ratio_sum = 0for i in range(2000):w_pocket = Pocket()ratio = Pocket_Error(w_pocket)ratio_sum += ratioave_ratio = ratio_sum/2000print(ave_ratio)

运行结果为

20.

import pandas as pd
import numpy as np
import copy as cpdef get_training_set():# 读取.dat文件,并用\s+匹配空格读取出五列数据,header将第一行放入数据(否则第一行是列名)data = pd.read_csv('F:\\Kaggle\\hw1_18_train.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 构建特征向量XX_train = np.array(data.iloc[:, 0:4])# 训练集大小data_size = X_train.shape[0]# 训练集X加上一列1X_train = np.hstack((np.ones((data_size, 1)), X_train))# 构建标签yy_train = np.array(data.iloc[:, 4:5])# 打乱数据list = np.random.permutation(data_size)X_train = X_train[list]y_train = y_train[list]return X_train, y_train, data_sizedef get_test_set():data = pd.read_csv('F:\\Kaggle\\hw1_18_test.dat', sep='\s+', names=['a', 'b', 'c', 'd', 'y'])# 初始化测试集test_set = np.array(data)# 测试集大小test_size = test_set.shape[0]# 测试X与y分组X_val = test_set[:, 0:4]X_val = np.hstack((np.ones((test_size, 1)), X_val))y_val = test_set[:, 4:5]return X_val, y_val, test_sizedef Pocket():X_train, y_train, data_size = get_training_set()# 初始化ww = np.zeros((5, 1))count = 0max_iters = data_sizew_pocket = np.zeros((5, 1))for i in range(data_size):if np.dot(X_train[i, :], w) * y_train[i, :] <= 0:w += 0.5 * y_train[i, :] * X_train[i, :].reshape(5, 1)#计数器加一count += 1validation = 0#验证w,是否放入口袋for j in range(data_size):if np.dot(X_train[j, :], w) * y_train[j, :] <= 0:validation += 1if validation < max_iters:#验证次数,如果比上一次小,就将新的w放入pocketmax_iters = validationw_pocket = cp.deepcopy(w)#迭代次数达到50次时,退出循环if 100 == count:breakreturn w_pocketdef Pocket_Error(w_pocket):X_val, y_val, test_size = get_test_set()error_num = 0# 验证错误个数for i in range(test_size):if np.dot(X_val[i, :], w_pocket) * y_val[i, :] <= 0:error_num += 1error_ratio = error_num/test_sizereturn error_ratioif __name__ == '__main__':ratio_sum = 0for i in range(2000):w_pocket = Pocket()ratio = Pocket_Error(w_pocket)ratio_sum += ratioave_ratio = ratio_sum/2000print(ave_ratio)

把Pocket中的count判断值改为100即可,结果为

林轩田《机器学习基石》作业一-Python实现相关推荐

  1. 台湾大学林轩田机器学习基石课程学习笔记13 -- Hazard of Overfitting

    红色石头的个人网站:redstonewill.com 上节课我们主要介绍了非线性分类模型,通过非线性变换,将非线性模型映射到另一个空间,转换为线性模型,再来进行分类,分析了非线性变换可能会使计算复杂度 ...

  2. 林轩田机器学习基石(Machine Learning Foundations)笔记(一)

    要求两周之内看完林轩田机器学习基石和技法,速度是要的,但是感觉看得太快我会不久就会忘记.因此记录一下梗概,顺便写写看视频时解决的不懂的地方(有种思想就是把知识传播给更多人,有时候就是靠大佬们善意的一句 ...

  3. 台大林轩田·机器学习基石记要

    台大林轩田·机器学习基石记要 昨天开始看林轩田的机器学习基石,从今天起开始去粗取精 第一讲比较基础,一些概念自己早已经理解了,所以不再做笔记,有点印象的是讲到了ML.DL.AI的一些联系与区别,ML主 ...

  4. 台湾大学林轩田机器学习基石课程学习 笔记资料汇总

    笔记: 红色石头的专栏(写得非常好) 课后练习: 機器學習基石(Machine Learning Foundations) 机器学习基石 课后习题链接汇总 https://download.csdn. ...

  5. 机器学习系列笔记:林轩田机器学习基石——机器学习的可行性

    机器学习系列笔记:林轩田机器学习基石--机器学习的可行性

  6. 台湾大学林轩田机器学习基石课程学习笔记1 -- The Learning Problem

    红色石头的个人网站:redstonewill.com 最近在看NTU林轩田的<机器学习基石>课程,个人感觉讲的非常好.整个基石课程分成四个部分: When Can Machine Lear ...

  7. 台湾大学林轩田机器学习基石课程学习笔记15 -- Validation

    红色石头的个人网站:redstonewill.com 上节课我们主要讲了为了避免overfitting,可以使用regularization方法来解决.在之前的EinEinE_{in}上加上一个reg ...

  8. 台湾大学林轩田机器学习基石课程学习笔记14 -- Regularization

    红色石头的个人网站:redstonewill.com 上节课我们介绍了过拟合发生的原因:excessive power, stochastic/deterministic noise 和limited ...

  9. 台湾大学林轩田机器学习基石课程学习笔记12 -- Nonlinear Transformation

    红色石头的个人网站:redstonewill.com 上一节课,我们介绍了分类问题的三种线性模型,可以用来解决binary classification和multiclass classificati ...

  10. 台湾大学林轩田机器学习基石课程学习笔记11 -- Linear Models for Classification

    红色石头的个人网站:redstonewill.com 上一节课,我们介绍了Logistic Regression问题,建立cross-entropy error,并提出使用梯度下降算法gradient ...

最新文章

  1. 人脸识别方法个人见解
  2. 【NLP】业界总结 | BERT的花式玩法
  3. Linux下C程序的内存映像
  4. java final 修改_“无法改变的设计”——浅谈Java中的final关键字
  5. java务必让常量的值在运行期保持不变
  6. 编程让鼠标一直动_华硕、罗技、海盗船无线鼠标选哪个?
  7. carlife android 无线,carlife无线连接流程是什么
  8. java flask_将Java与Python Flask连接
  9. 计算机对身体有哪些危害,经常玩电脑的危害 经常玩电脑对身体有哪些伤害
  10. 我的朋友圈又被刷屏了。
  11. 数据AES加密安全传输之后台JAVA加密解密
  12. npm查找依赖包版本
  13. android http上传,Android HttpClient上传文件(亲测,成功)
  14. 常用的网络上的 webservice 地址
  15. 怎么把图片的边缘弄圆_如何PS制做出边缘清晰或虚化的圆角照片
  16. 海尔智家、海尔电器合体战美的、格力
  17. ​卧槽!这世上为什么要有乱码这个东西???
  18. 【应届生笔试资料分享二】中国农业银行相关介绍汇总
  19. CESM模式及其各个分量模式介绍
  20. ulpfec,flexFec,Pro-MPEG cop3协议对比

热门文章

  1. 使用RxJava的retryWhen操作符实现token过期自动刷新
  2. 教你如何写好一篇专利申请?
  3. 新Macbook电池续航能力表现欠佳,用户表示用不到5小时
  4. 计算机考研中的编号,考研科目前边的编号是什么意思
  5. 批量发送邮件(java开发、带图片和附件)
  6. 3DMax一个重要功能,通过它制作出来的影视作品有很强立体感
  7. 机器学习从入门到创业手记-2.算法与导师
  8. 搜狗输入法中文状态下,打出来还是英文按住键盘Ctrl+shift+E就可以了
  9. Srpingcloud之eureka,微服架构之注册中心eureka
  10. HMGK-being_hacked