来源:github

本代码是一个正在进行的工作,以提供代码用马尔可夫链蒙特卡罗优化神经网络。

python run_conv.py --config_file configs/config_mcmc_conv_real.json --data_folder /home/paul/data/pytorch_cifar10/ --verbose

代码解析

1.run_conv.py

1.导入库

import time, os, datetime
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST
import math
import json
import torch
import numpy as np, math
#import sys
#sys.path.insert()
from deep_learning_mcmc import nets, optimizers, stats, selector
import argparse

2.获取传入参数

parser = argparse.ArgumentParser(description='Train a model on cifar10 with either mcmc or stochastic gradient based approach')
parser.add_argument('--data_folder',help='absolute path toward the data folder which contains the cifar10 dataset. Pytorch will download it if it does not exist',required=True, type=str)
parser.add_argument('--config_file',help='json file containing various setups (learning rate, mcmc iterations, variance of the priors and the proposal, batch size,...)',default=None, type=str)
parser.add_argument('--measure_power',help='if set, will record the power draw. This requires the deep_learning_measure package.',action='store_true')
parser.add_argument('--verbose',help='if set, will print the details of each mcmc iteration.',action='store_true')
args = parser.parse_args()
params = vars(args)
json_params = json.load(open(params['config_file']))
for k,v in json_params.items():params[k] = v

3.数据获取

dataset = params['dataset']if dataset == 'MNIST':print('MNIST DATASET')channels = 1transform = transforms.Compose([transforms.ToTensor()])training_data = MNIST(root = args.data_folder, train=True, download=True, transform=transform)test_data = MNIST(root = args.data_folder, train=False, download=True, transform=transform)
else:print('CIFAR10 DATASET')channels = 3training_data = datasets.CIFAR10(root=args.data_folder,train=True,download=True,transform=ToTensor())test_data = datasets.CIFAR10(root=args.data_folder,train=False,download=True,transform=ToTensor())
examples = enumerate(training_data)
batch_idx, (ex_train_data, example_targets) = next(examples)
examples = enumerate(test_data)
batch_idx, (ex_test_data, example_targets) = next(examples)
print('Image input size',ex_train_data.shape)
# Image input size torch.Size([3, 32, 32])
img_size = ex_train_data.shape[1]# 获取数据 batch_size=50000
train_dataloader = DataLoader(training_data, batch_size=batch_size, num_workers=0)
test_dataloader = DataLoader(test_data, batch_size=batch_size, num_workers=0)
#Training size (50000, 32, 32, 3)
# 10类,one-hot=10
output_size = len(training_data.classes)

4.展示参数

print('Experience config --')
print(params)
Experience config --
{'data_folder': '/home/paul/data/pytorch_cifar10/', 'config_file': 'configs/config_mcmc_conv_real.json', 'measure_power': 0, 'verbose': True, 'batch_size': 50000, 'epochs': 1000, 'exp_name': 'mozer', 'architecture': {'boolean_flags': [0, 0], 'activations': ['ReLU', 'Softmax'], 'nb_filters': 64}, 'variance_init': 1e-08, 'optimizer': {'name': 'mcmc', 'pruning_level': 0, 'selector': {'name': 'Selector', 'layer_conf': [{'layer_distr': 0.5, 'get_idx': 'get_idces_filter_conv'}, {'layer_distr': 0.5, 'get_idx': 'get_idces_uniform_linear', 'get_idx_param': 363}]}, 'samplers': [{'sampler': {'name': 'Student', 'variance': 1e-07}, 'prior': {'name': 'Student', 'variance': 0.001}, 'lamb': 100000}, {'sampler': {'name': 'Student', 'variance': 1e-07}, 'prior': {'name': 'Student', 'variance': 0.001}, 'lamb': 100000}], 'iter_mcmc': 200}, 'dataset': 'CIFAR'}

5. 结构参数(源于config_file = config_mcmc_conv_real.json)

if "nb_filters" not in params["architecture"]:layer_sizes = [input_size, output_size]
else:layer_sizes = [input_size, params["architecture"]['nb_filters'], output_size]if "boolean_flags" in params["architecture"]:boolean_flags = [bool(b) for b in params['architecture']['boolean_flags']]
else:boolean_flags = [False for _ in layer_sizes[1:]]
if "activations" not in params["architecture"]:activations=None
else:activations = params["architecture"]["activations"]

其中
config_mcmc_conv_real.json:

{"batch_size": 50000,"epochs": 1000,"exp_name": "mozer","architecture": {"boolean_flags": [0, 0],"activations" : ["ReLU", "Softmax"],"nb_filters" : 64},"variance_init": 0.00000001,"optimizer": {"name": "mcmc","pruning_level":0,"selector" : {"name": "Selector","layer_conf" : [{"layer_distr" :0.5, "get_idx":"get_idces_filter_conv"},{"layer_distr" :0.5, "get_idx":"get_idces_uniform_linear","get_idx_param":363}]},"samplers" : [{"sampler": {"name" : "Student","variance":0.0000001 }, "prior" : {"name" : "Student", "variance": 0.001}, "lamb":100000},{"sampler": {"name" : "Student","variance":0.0000001 }, "prior" : {"name" : "Student", "variance": 0.001}, "lamb":100000}],"iter_mcmc" : 200},"dataset": "CIFAR","measure_power": 0
}

6.优化器选择

# 是否使用 grad 优化器
use_gradient = params['optimizer']["name"] == 'grad'
# setting the optimizer
if params["optimizer"]["name"] == "grad":if 'pruning_level' in params["optimizer"]:optimizer = optimizers.GradientOptimizer(lr=params["optimizer"]['lr'],pruning_level=params["optimizer"]['pruning_level'])else:optimizer = optimizers.GradientOptimizer(lr=params["optimizer"]['lr'])
elif params["optimizer"]["name"] == "binaryConnect":optimizer = optimizers.BinaryConnectOptimizer(lr=params["optimizer"]['lr'])
# 实例进入以下if
else:config = {'name': params['optimizer']['selector']['name'], 'layer_conf':[]}for layer_conf in params['optimizer']['selector']['layer_conf']:layer_distr = layer_conf['layer_distr']if 'get_idx_param' in layer_conf:get_idx = getattr(selector, layer_conf['get_idx'])(layer_conf['get_idx_param'])else:get_idx = getattr(selector, layer_conf['get_idx'])()config['layer_conf'].append({'layer_distr': layer_distr, 'get_idx': get_idx})selector =  selector.build_selector(config)samplers = stats.build_samplers(params["optimizer"]["samplers"])if 'pruning_level' in params["optimizer"]:optimizer = optimizers.MCMCOptimizer(samplers, iter_mcmc=params["optimizer"]["iter_mcmc"], prior=samplers, selector=selector,pruning_level=params["optimizer"]['pruning_level'])else:optimizer = optimizers.MCMCOptimizer(samplers, iter_mcmc=params["optimizer"]["iter_mcmc"], prior=samplers, selector=selector)

7.其他设置

1.设备设置: GPU or CPU
2.迭代次数:源于config_file
3.损失函数:CrossEntropyLoss()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
epochs = params['epochs']
loss_fn = torch.nn.CrossEntropyLoss()
results = {}

参数初始化,模型实例化

init_sampler = stats.Student(params['variance_init'])
if "variance_init" in params:st_init = stats.Student(params['variance_init'])if 'pruning_level' in params["optimizer"]:#入此model = nets.ConvNet(params['architecture']['nb_filters'], channels, init_sampler, binary_flags=boolean_flags,  activations=activations, init_sparse=st_init,pruning_level = params["optimizer"]['pruning_level'])else:model = nets.ConvNet(params['architecture']['nb_filters'], channels, init_sampler, binary_flags=boolean_flags,  activations=activations, init_sparse=st_init)
else:if 'pruning_level' in params["optimizer"]:model = nets.ConvNet(params['architecture']['nb_filters'], channels, init_sampler, binary_flags=boolean_flags,  activations=activations,pruning_level = params["optimizer"]['pruning_level'])else:model = nets.ConvNet(params['architecture']['nb_filters'], channels, init_sampler, binary_flags=boolean_flags,  activations=activations)

# exp_name = mozer
exp_name = params['exp_name']

other

model = model.to(device)
training_time = 0
eval_time = 0
# 时间记录
start_all = time.time()
# 更新次数
previous_w_updated = 0

2. 开始优化

注:此章节所有的代码,均位于以下for循环中

for t in range(epochs):

2.1 时间戳

 start_epoch = time.time()print(f"Epoch {t+1} is running\n--------------------- duration = "+time.strftime("%H:%M:%S",time.gmtime(time.time() - start_all))

2.2 优化

    if use_gradient:optimizer.train_1_epoch(train_dataloader, model, loss_fn)else:acceptance_ratio = optimizer.train_1_epoch(train_dataloader, model, loss_fn, verbose=params['verbose'])

其中 optimizer.train_1_epoch 如下所示

2.3 优化器

MCMC 优化器类

class MCMCOptimizer(Optimizer):

init function

    def __init__(self, sampler, data_points_max = 1000000000, iter_mcmc=1, lamb=1000,  prior=None, selector=None, pruning_level=0):"""variance_prop : zero centered univariate student law class to generate the proposalsvariance_prior : zero centered univariate student law class used as a prior on the parameter valueslamb : ponderation between the data and the prioriter_mcmc : number of mcmc iterations"""super().__init__(data_points_max = 1000000000)self.iter_mcmc = iter_mcmcself.lamb = lambself.sampler = samplerself.pruning_level = pruning_levelif prior is None:self.prior = self.samplerelse:self.prior = priorself.selector = selector

训练

        def train_1_epoch(self, dataloader, model, loss_fn, verbose=False):"""train for 1 epoch and collect the acceptance ratio"""num_items_read = 0acceptance_ratio = Acceptance_ratio()device = next(model.parameters()).devicefor _, (X, y) in enumerate(dataloader):if self.data_points_max <= num_items_read:breakX = X[:min(self.data_points_max - num_items_read, X.shape[0])]y = y[:min(self.data_points_max - num_items_read, X.shape[0])]num_items_read = min(self.data_points_max, num_items_read + X.shape[0])X = X.to(device)y = y.to(device)acceptance_ratio += self.train_1_batch(X, y, model, dataloader, loss_fn=torch.nn.CrossEntropyLoss(), verbose=verbose)return acceptance_ratiodef train_1_batch(self, X, y, model, dataloader, loss_fn, verbose=False):"""perform mcmc iterations with a neighborhood corresponding to one line of the parameters.the acceptance of the proposal depends on the following criterionexp(lamb * (loss_previous - loss_prop) ) * stud(params_prop) / stud(params_previous)inputs:X : input datay : input labelsmodel : neural net we want to optimizeloss_fn : loss functionoutputs:acceptance_ratiomodel : optimised model (modified by reference)"""device = next(model.parameters()).devicear = Acceptance_ratio()pred = model(X)loss = loss_fn(pred,y).item()if self.pruning_level>0:Pruner = pruning.MCMCPruner()relevance_dict_conv_layer = {}for cle in range(model.conv1.weight.data.shape[0]):relevance_dict_conv_layer[cle] = 0relevance_dict_linear_layer_w = torch.zeros(model.fc1.weight.data.shape)relevance_dict_linear_layer_b = torch.zeros(model.fc1.bias.data.shape[0],1)relevance_dict_linear_layer = {'weight':relevance_dict_linear_layer_w, 'bias':relevance_dict_linear_layer_b}for i in tqdm(range(self.iter_mcmc),position=0):#print(i)if i>0 and self.pruning_level>0 and i%200 == 0:#skeletonize any 50 mcmc iterationsprint(i)print('Pruning level for conv layer',1-torch.count_nonzero(model.conv1.weight.data).item()/23232)print('Pruning level for FC layer =',1-torch.count_nonzero(model.fc1.weight.data).item()/40960)print('Skeletonization...')Pruner.skeletonize_conv(model,self.pruning_level,relevance_dict_conv_layer)Pruner.skeletonize_fc(model,self.pruning_level,relevance_dict_linear_layer)print('Pruning level for conv layer',1-torch.count_nonzero(model.conv1.weight.data).item()/23232)print('Pruning level for FC layer =',1-torch.count_nonzero(model.fc1.weight.data).item()/40960)loss = loss_fn(model(X),y)#update loss for a faithful likelihood ratio# selecting a layer and a  at randomlayer_idx, idces = self.selector.get_neighborhood(model)neighborhood = layer_idx, idcesparams_line = self.selector.getParamLine(neighborhood, model)epsilon = self.sampler.sample(self.selector.neighborhood_info)if epsilon is not None:epsilon = torch.tensor(epsilon.astype('float32')).to(device)# getting the ratio of the studentsstudent_ratio = self.prior.get_ratio(epsilon, params_line, self.selector.neighborhood_info)# applying the changes to get the new value of the lossself.selector.update(model, neighborhood, epsilon)pred = model(X)loss_prop = loss_fn(pred, y)# computing the change in the losslamb = self.sampler.get_lambda(self.selector.neighborhood_info)data_term = torch.exp(lamb * (loss -loss_prop))rho  = min(1, data_term * student_ratio)#print('rho for layer',layer_idx,'=', torch.tensor(rho).item())#print('data term = ',data_term.item(),'student ratio =', student_ratio.item())#if verbose:#    print(i,'moove layer',layer_idx,'rho=',float(rho),'data term=',float(data_term),'ratio=',float(student_ratio),'| ','loss_prop',float(loss_prop),'loss gain',float(loss-loss_prop))key = self.selector.get_proposal_as_string(neighborhood)ar.incr_prop_count(key) # recording so that we can later compute the acceptance ratioif rho > torch.rand(1).to(device):# accepting, keeping the new value of the lossar.incr_acc_count(key)loss = loss_propdecision = 'accepted'#print('moove layer',layer_idx,' accepted')if layer_idx == 0 and self.pruning_level >0:relevance_dict_conv_layer[int(idces[0][0][0])]+=1if layer_idx == 1 and self.pruning_level >0:relevance_dict_linear_layer['weight'][idces[0][:,0],idces[0][:,1]] +=1relevance_dict_linear_layer['bias'][idces[1]] +=1else:# not accepting, so undoing the changeself.selector.undo(model, neighborhood, epsilon)decision = 'rejected'#if verbose:#print('moove',decision)#print('non-zero values for conv layer =',torch.count_nonzero(model.conv1.weight.data))#print('Pruning level for conv layer',1-torch.count_nonzero(model.conv1.weight.data).item()/23232)#print('non-zero values for FC layer =',torch.count_nonzero(model.fc1.weight.data))#print('Pruning level for FC layer =',1-torch.count_nonzero(model.fc1.weight.data).item()/40960)return ar

其中 student_ratio = self.prior.get_ratio(epsilon, params_line, self.selector.neighborhood_info) 如下所示

 def get_ratio(self, epsilon, params, neighborhood_info):"""compute theepsilon : delta drawn from the samplerparams : parameter weight of the modelneighborhood_info : information about the current network layer"""layer_idx, _ = neighborhood_inforeturn self.samplers[layer_idx]['prior'].get_ratio(epsilon, params)

上面的 .get_ratio 如下所示:

    def get_ratio(self, epsilon, params):"""compute the likelihood ratio of two variablesstudent(params[i] + epsilon[i])Prod_i (   ------------------------      )student(params[i])"""#apply the move to get theta tildeparams_tilde = params + epsilon# get the likelihood of the thetaden = self.t_distribution_fast(params)# get the likelihood of the theta tildenum = self.t_distribution_fast(params_tilde)ratio = num / denreturn functools.reduce(mul, ratio, 1)

2.4记录

  • 批次
  • 时间
  • 后向梯度
  • 损失函数
  • 正确率
  • 接受率
 result = {"epoch":t}end_epoch = time.time()training_time += time.time() - start_epochresult['training_time'] = time.time() - start_epochresult['end_training_epoch'] = datetime.datetime.now().__str__()loss, accuracy = nets.evaluate(train_dataloader, model, loss_fn)
    if use_gradient:result['iterations'] = (t+1)*int(50000/batch_size)result['passforwards'] = (t+1)*50000result['backwards'] = (t+1)*50000result['weights_updated'] = (t+1)*int(50000/batch_size)*64266print(f"Training Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {loss:>8f} \n")else:result['iterations'] = (t+1)*params["optimizer"]["iter_mcmc"]*int(50000/batch_size)result['passforwards'] = (t+1)*params["optimizer"]["iter_mcmc"]*int(50000/batch_size)result['backwards'] = 0print(f"Training Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {loss:>8f} \n") #Acceptance ratio: {acceptance_ratio:>2f}")print("Acceptance ratio",acceptance_ratio)if not use_gradient:result['accept_ratio'] = acceptance_ratio.to_dict()acc_0 = acceptance_ratio.to_dict()["layer_0"]acc_1 = acceptance_ratio.to_dict()["layer_1"]if 'get_idx_param' in params['optimizer']['selector']['layer_conf'][1]:result['weights_updated'] = previous_w_updated + int(50000/batch_size)*params["optimizer"]["iter_mcmc"]*(0.5*363+0.5*layer_conf['get_idx_param'])previous_w_updated = result['weights_updated']result['train_loss'] = lossresult['train_accuracy'] = accuracyloss, accuracy = nets.evaluate(test_dataloader, model, loss_fn)print(f"Test Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {loss:>8f} \n")result['test_loss'] = lossresult['test_accuracy'] = accuracy

3. 其他

    for i in range(9):proba = 0.1+i*0.1loss_sparse, accuracy_sparse, kept = nets.evaluate_sparse(test_dataloader, model, loss_fn,proba,boolean_flags)if i == 0:result['sparse test'] = [{'test loss sparse' : loss_sparse, 'testing accuracy sparse' : accuracy_sparse, 'l0 norm': kept }]else:result['sparse test'].append({'test loss sparse' : loss_sparse, 'testing accuracy sparse' : accuracy_sparse, 'l0 norm': kept })for i in range(9):proba = 0.91+i*0.01loss_sparse, accuracy_sparse, kept = nets.evaluate_sparse(test_dataloader, model, loss_fn,proba,boolean_flags)result['sparse test'].append({'test loss sparse' : loss_sparse, 'testing accuracy sparse' : accuracy_sparse, 'l0 norm': kept })if int(math.log(t+1,10)) == math.log(t+1,10):torch.save(model, exp_name+str(t+1)+'.th')result['eval_time'] = time.time() - end_epocheval_time += time.time() - end_epochresult['end_eval'] = datetime.datetime.now().__str__()results[t]=resultjson.dump(results, open(exp_name+'.json','w'))
if params['measure_power']:q.put(experiment.STOP_MESSAGE)print("power measuring stopped")driver = parsers.JsonParser("power_measure")exp_result = experiment.ExpResults(driver)exp_result.print()print(exp_name+'.json generated')print('Report is written at '+str(exp_name)+'.csv')

实验二:MCMC训练的深度学习相关推荐

  1. 二十五个深度学习相关公开数据集

    转 [干货]二十五个深度学习相关公开数据集 2018年04月18日 13:42:53 阅读数:758 (选自Analytics Vidhya:作者:Pranav Dar:磐石编译) 目录 介绍 图像处 ...

  2. 该如何训练好深度学习模型?

    博主不是专业搞竞赛出生,仅依赖博主为数有限的工程经验,从工程实践的方面讨论如何把深度学习模型训练好.如果写的不好,欢迎各位大佬在评论区留言互动.训练好深度学习模型与训练深度学习模型差别还是很大的,训练 ...

  3. AI公开课:19.05.29 浣军-百度大数据实验室主任《AutoDL 自动化深度学习建模的算法和应用》课堂笔记以及个人感悟

    AI公开课:19.05.29 浣军 百度大数据实验室主任<AutoDL 自动化深度学习建模的算法和应用>课堂笔记以及个人感悟 导读        浣军博士,汉族,1975年出生于江苏苏州, ...

  4. 二、代码实现深度学习道路训练样本数据的制作(代码部分详解)——重复工作+多次返工的血泪史

    使用python读取文件夹对图片进行批量裁剪 通过第一部分操作arcgis制作了一部分样本数据 分辨率与原相片保持一致 为6060*6060 具体如图所示: 而我们深度学习一般使用的分辨率是1024和 ...

  5. 用少于10行代码训练前沿深度学习新药研发模型

    ©PaperWeekly · 作者|黄柯鑫 学校|哈佛大学硕士生 研究方向|图学习和生物医疗 深度学习正在革新药研发行业.在本文中,我们将展示如何使用 DeepPurpose,一个基于 PyTorch ...

  6. 三维图形几何变换算法实验_计算机视觉方向简介 | 深度学习视觉三维重建

    点击上方"计算机视觉life",选择"星标" 快速获得最新干货 作者: Moonsmile https://zhuanlan.zhihu.com/p/79628 ...

  7. [Python人工智能] 二十八.Keras深度学习中文文本分类万字总结(CNN、TextCNN、LSTM、BiLSTM、BiLSTM+Attention)

    从本专栏开始,作者正式研究Python深度学习.神经网络及人工智能相关知识.前一篇文章分享了BiLSTM-CRF模型搭建及训练.预测,最终实现医学命名实体识别实验.这篇文章将详细讲解Keras实现经典 ...

  8. (二十六)深度学习目标检测:Fast-RCNN

    Fast-RCNN RCNN存在的问题: 1.一张图像上有大量的重叠框,所以这些候选框送入神经网络时候,提取特征会有冗余! 2.训练的空间需求大.因为RCNN中,独立的分类器和回归器需要很多的特征作为 ...

  9. 【刘二大人】PyTorch深度学习实践

    文章目录 一.overview 1 机器学习 二.Linear_Model(线性模型) 1 例子引入 三.Gradient_Descent(梯度下降法) 1 梯度下降 2 梯度下降与随机梯度下降(SG ...

最新文章

  1. NLP(5) | 命名实体识别
  2. python java 爬数据_如何用java爬虫爬取网页上的数据
  3. 硕士生两年发14篇论文!获浙大最高层次奖学金!
  4. mysql数据库中的十进位是什么意思?
  5. Oracle全文检索示例
  6. Query String Object 2.1.7
  7. lstm 根据前文预测词_干货 | Pytorch实现基于LSTM的单词检测器
  8. hibernate的映射之二(一对多双向关联)
  9. 在Linux Redhat 9.0使用YUM
  10. Angular实现购物车计算
  11. Gan 和 StyleGAN 架构
  12. 7计数 contact form_接近开关和光电开关是一种具有开关量输出的位置传感器
  13. 这个智能通风孔,为珠三角创造了百万美金GDP|HWTrek解密百万美金订单是怎样炼成的
  14. 希尔顿携手飞猪拓宽双方会员生态体系
  15. 继续分享一些基础的Python编程2
  16. $().ajaxSubmit is not a function解决方案
  17. 2022世界杯冠军预测,机器学习为你揭晓
  18. 关于PO自动生成AP发票
  19. 终端数据防泄漏方案解析
  20. 【汇编语言】 安装虚拟机运行dos系统 教程

热门文章

  1. python代码画樱花带图片_python编程——pygame画樱花树
  2. 安卓手机使用Termux软件进行Linux系统的安装
  3. 远程linux云主机,Linux实验室 远程连接Linux云主机方法
  4. 用matlab表白,你有一颗爱她的心,你就画出来
  5. 【历史上的今天】7 月 8 日:PostgreSQL 发布;SUSE 收购 K8s 最大服务商;动视暴雪合并
  6. 【行人轨迹预测数据集——ETH、UCY】
  7. 洛谷 P5594 【XR-4】模拟赛
  8. 金融魔方创始人兼CEO刘嘉:金融SaaS为中小企业赋能的机遇与挑战
  9. Maltab生成棋盘格
  10. 正在完成android更新三星,手机实时动态:这些都是三星手机正在获取Android 10