深度学习(八)RBM受限波尔兹曼机学习-未完待续
RBM受限波尔兹曼机学习
原文地址:
作者:hjimce
[python] view plain copy
- #coding=utf-8
- import timeit
- try:
- import PIL.Image as Image
- except ImportError:
- import Image
- import numpy
- import theano
- import theano.tensor as T
- import os
- from theano.tensor.shared_randomstreams import RandomStreams
- from utils import tile_raster_images
- from logistic_sgd import load_data
- #RBM网络结构设置,主要是参数初始化
- class RBM(object):
- def __init__(self,input=None,n_visible=784,n_hidden=500,W=None,hbias=None,vbias=None,numpy_rng=None,theano_rng=None):
- self.n_visible = n_visible
- self.n_hidden = n_hidden
- if numpy_rng is None:
- numpy_rng = numpy.random.RandomState(1234)
- if theano_rng is None:
- theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
- #参数初始化公式 -4*sqrt(6./(n_visible+n_hidden))
- if W is None:
- initial_W = numpy.asarray(
- numpy_rng.uniform(
- low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
- high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
- size=(n_visible, n_hidden)
- ),
- dtype=theano.config.floatX
- )
- #在GPU上,多线程权重共享
- W = theano.shared(value=initial_W, name='W', borrow=True)
- #隐藏层偏置b初始化为0
- if hbias is None:
- hbias = theano.shared(value=numpy.zeros(n_hidden,dtype=theano.config.floatX),name='hbias',borrow=True)
- #可见层偏置项b初始化为0
- if vbias is None:
- vbias = theano.shared(value=numpy.zeros(n_visible,dtype=theano.config.floatX),name='vbias',borrow=True)
- #网络输入
- self.input = input
- if not input:
- self.input = T.matrix('input')
- self.W = W
- self.hbias = hbias
- self.vbias = vbias
- self.theano_rng = theano_rng
- #网络的所有参数,我们把它们放在一个列表中,以便后续访问
- self.params = [self.W, self.hbias, self.vbias]
- #能量函数的定义,主要是用于计算可视层的能量
- def free_energy(self, v_sample):
- ''''' Function to compute the free energy '''
- wx_b = T.dot(v_sample, self.W) + self.hbias
- vbias_term = T.dot(v_sample, self.vbias)
- hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
- return -hidden_term - vbias_term
- #前向传导 从可视层到隐藏层,计算p(h=1|v)
- def propup(self, vis):
- pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias
- return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
- #根据可视层状态 计算隐藏层状态
- def sample_h_given_v(self, v0_sample):
- #计算隐藏层每个神经元为状态1的概率,即 p(h=1|v)
- pre_sigmoid_h1, h1_mean = self.propup(v0_sample)
- #根据给定的概率,进行采样,就像抛硬币一样。n表示抛硬币的次数 ,每个神经元随机采样一次,就可以得到每个神经元的状态了
- #p是生成1的概率,binomial函数生成的是一个0、1数
- h1_sample = self.theano_rng.binomial(size=h1_mean.shape,n=1,p=h1_mean,dtype=theano.config.floatX)
- return [pre_sigmoid_h1, h1_mean, h1_sample]
- #后向传导,从隐藏层到可视层,计算p(v=1|h)
- def propdown(self, hid):
- pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias
- return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)]
- #根据隐藏层的状态计算可视层状态
- def sample_v_given_h(self, h0_sample):
- pre_sigmoid_v1, v1_mean = self.propdown(h0_sample)
- v1_sample = self.theano_rng.binomial(size=v1_mean.shape,
- n=1, p=v1_mean,
- dtype=theano.config.floatX)
- return [pre_sigmoid_v1, v1_mean, v1_sample]
- #计算从隐藏层-》可视层-》隐藏层的一个状态转移过程,相当于一次的Gibbs sampling采样
- def gibbs_hvh(self, h0_sample):
- pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample)
- pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample)
- return [pre_sigmoid_v1, v1_mean, v1_sample,
- pre_sigmoid_h1, h1_mean, h1_sample]
- #计算从可视层-》隐藏层-》可视层的状态转移过程,相当于一次的Gibbs sampling采样
- def gibbs_vhv(self, v0_sample):
- pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v0_sample)
- pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h1_sample)
- return [pre_sigmoid_h1, h1_mean, h1_sample,
- pre_sigmoid_v1, v1_mean, v1_sample]
- # k用于设置Gibbs sampling采样次数,也就是相当于来回跑了多少次(来回一趟算一次)
- def get_cost_updates(self, lr=0.1, persistent=None, k=1):
- # 当我们输入数据的时候,首先根据输入x,计算隐藏层的概率分布,概率分布的采样结果
- pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input)
- # decide how to initialize persistent chain:
- # for CD, we use the newly generate hidden sample
- # for PCD, we initialize from the old state of the chain
- if persistent is None:
- chain_start = ph_sample
- else:
- chain_start = persistent
- #让函数来回跑k次
- (
- [
- pre_sigmoid_nvs,
- nv_means,
- nv_samples,
- pre_sigmoid_nhs,
- nh_means,
- nh_samples
- ],
- updates
- ) = theano.scan(self.gibbs_hvh,outputs_info=[None, None, None, None, None, chain_start],n_steps=k)
- #拿到最后一次循环的状态
- chain_end = nv_samples[-1]
- #构造损失函数
- cost = T.mean(self.free_energy(self.input)) - T.mean(
- self.free_energy(chain_end))
- #计算梯度,然后进行梯度下降更新
- gparams = T.grad(cost, self.params, consider_constant=[chain_end])
- for gparam, param in zip(gparams, self.params):
- updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX)
- if persistent:
- # Note that this works only if persistent is a shared variable
- updates[persistent] = nh_samples[-1]
- # pseudo-likelihood is a better proxy for PCD
- monitoring_cost = self.get_pseudo_likelihood_cost(updates)
- else:
- # reconstruction cross-entropy is a better proxy for CD
- monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1])
- return monitoring_cost, updates
- # end-snippet-4
- def get_pseudo_likelihood_cost(self, updates):
- """Stochastic approximation to the pseudo-likelihood"""
- # index of bit i in expression p(x_i | x_{\i})
- bit_i_idx = theano.shared(value=0, name='bit_i_idx')
- # binarize the input image by rounding to nearest integer
- xi = T.round(self.input)
- # calculate free energy for the given bit configuration
- fe_xi = self.free_energy(xi)
- # flip bit x_i of matrix xi and preserve all other bits x_{\i}
- # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
- # the result to xi_flip, instead of working in place on xi.
- xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])
- # calculate free energy with bit flipped
- fe_xi_flip = self.free_energy(xi_flip)
- # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
- cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip -
- fe_xi)))
- # increment bit_i_idx % number as part of updates
- updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
- return cost
- def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
- """Approximation to the reconstruction error
- Note that this function requires the pre-sigmoid activation as
- input. To understand why this is so you need to understand a
- bit about how Theano works. Whenever you compile a Theano
- function, the computational graph that you pass as input gets
- optimized for speed and stability. This is done by changing
- several parts of the subgraphs with others. One such
- optimization expresses terms of the form log(sigmoid(x)) in
- terms of softplus. We need this optimization for the
- cross-entropy since sigmoid of numbers larger than 30. (or
- even less then that) turn to 1. and numbers smaller than
- -30. turn to 0 which in terms will force theano to compute
- log(0) and therefore we will get either -inf or NaN as
- cost. If the value is expressed in terms of softplus we do not
- get this undesirable behaviour. This optimization usually
- works fine, but here we have a special case. The sigmoid is
- applied inside the scan op, while the log is
- outside. Therefore Theano will only see log(scan(..)) instead
- of log(sigmoid(..)) and will not apply the wanted
- optimization. We can not go and replace the sigmoid in scan
- with something else also, because this only needs to be done
- on the last step. Therefore the easiest and more efficient way
- is to get also the pre-sigmoid activation as an output of
- scan, and apply both the log and sigmoid outside scan such
- that Theano can catch and optimize the expression.
- """
- cross_entropy = T.mean(
- T.sum(
- self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
- (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
- axis=1
- )
- )
- return cross_entropy
- #
- def test_rbm(learning_rate=0.1, training_epochs=15,
- dataset='mnist.pkl.gz', batch_size=20,
- n_chains=20, n_samples=10, output_folder='rbm_plots',
- n_hidden=500):
- datasets = load_data(dataset)
- train_set_x, train_set_y = datasets[0]
- test_set_x, test_set_y = datasets[2]
- #计算训练的批数
- n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
- index = T.lscalar() # index to a [mini]batch
- x = T.matrix('x') # the data is presented as rasterized images
- rng = numpy.random.RandomState(123)
- theano_rng = RandomStreams(rng.randint(2 ** 30))
- persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),dtype=theano.config.floatX),borrow=True)
- #网络构建 ,可视层神经元个数为28*28,隐藏层神经元为500
- rbm = RBM(input=x, n_visible=28 * 28,n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)
- # get the cost and the gradient corresponding to one step of CD-15
- cost, updates = rbm.get_cost_updates(lr=learning_rate,persistent=persistent_chain, k=15)
- #################################
- # Training the RBM #
- #################################
- if not os.path.isdir(output_folder):
- os.makedirs(output_folder)
- os.chdir(output_folder)
- # start-snippet-5
- # it is ok for a theano function to have no output
- # the purpose of train_rbm is solely to update the RBM parameters
- train_rbm = theano.function([index],cost,updates=updates,givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]},name='train_rbm')
- plotting_time = 0.
- start_time = timeit.default_timer()
- # go through training epochs
- for epoch in xrange(training_epochs):
- # go through the training set
- mean_cost = []
- for batch_index in xrange(n_train_batches):
- mean_cost += [train_rbm(batch_index)]
- print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
- # Plot filters after each training epoch
- plotting_start = timeit.default_timer()
- # Construct image from the weight matrix
- image = Image.fromarray(
- tile_raster_images(
- X=rbm.W.get_value(borrow=True).T,
- img_shape=(28, 28),
- tile_shape=(10, 10),
- tile_spacing=(1, 1)
- )
- )
- image.save('filters_at_epoch_%i.png' % epoch)
- plotting_stop = timeit.default_timer()
- plotting_time += (plotting_stop - plotting_start)
- end_time = timeit.default_timer()
- pretraining_time = (end_time - start_time) - plotting_time
- print ('Training took %f minutes' % (pretraining_time / 60.))
- # end-snippet-5 start-snippet-6
- #################################
- # Sampling from the RBM #
- #################################
- # find out the number of test samples
- number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]
- # pick random test examples, with which to initialize the persistent chain
- test_idx = rng.randint(number_of_test_samples - n_chains)
- persistent_vis_chain = theano.shared(
- numpy.asarray(
- test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
- dtype=theano.config.floatX
- )
- )
- # end-snippet-6 start-snippet-7
- plot_every = 1000
- # define one step of Gibbs sampling (mf = mean-field) define a
- # function that does `plot_every` steps before returning the
- # sample for plotting
- (
- [
- presig_hids,
- hid_mfs,
- hid_samples,
- presig_vis,
- vis_mfs,
- vis_samples
- ],
- updates
- ) = theano.scan(
- rbm.gibbs_vhv,
- outputs_info=[None, None, None, None, None, persistent_vis_chain],
- n_steps=plot_every
- )
- # add to updates the shared variable that takes care of our persistent
- # chain :.
- updates.update({persistent_vis_chain: vis_samples[-1]})
- # construct the function that implements our persistent chain.
- # we generate the "mean field" activations for plotting and the actual
- # samples for reinitializing the state of our persistent chain
- sample_fn = theano.function(
- [],
- [
- vis_mfs[-1],
- vis_samples[-1]
- ],
- updates=updates,
- name='sample_fn'
- )
- # create a space to store the image for plotting ( we need to leave
- # room for the tile_spacing as well)
- image_data = numpy.zeros(
- (29 * n_samples + 1, 29 * n_chains - 1),
- dtype='uint8'
- )
- for idx in xrange(n_samples):
- # generate `plot_every` intermediate samples that we discard,
- # because successive samples in the chain are too correlated
- vis_mf, vis_sample = sample_fn()
- print ' ... plotting sample ', idx
- image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
- X=vis_mf,
- img_shape=(28, 28),
- tile_shape=(1, n_chains),
- tile_spacing=(1, 1)
- )
- # construct image
- image = Image.fromarray(image_data)
- image.save('samples.png')
- # end-snippet-7
- os.chdir('../')
- if __name__ == '__main__':
- test_rbm()
深度学习(八)RBM受限波尔兹曼机学习-未完待续相关推荐
- Windows x64内核学习笔记(五)—— KPTI(未完待续)
Windows x64内核学习笔记(五)-- KPTI(未完待续) KPTI 实验一:构造IDT后门并读取Cr3 参考资料 KPTI 描述:KPTI(Kernel page-table isolati ...
- DL:受限波尔兹曼机(RBM)能量模型
受限波尔兹曼机(RBM)能量模型 在学习Hinton的stack autoencoder算法(论文 Reducing the Dimensionality of Data with Neural Ne ...
- 神经网络 | 受限波尔兹曼机(附源代码)
===================================================== github:https://github.com/MichaelBeechan CSDN: ...
- pythonb超分辨成像_Papers | 超分辨 + 深度学习(未完待续)
1. SRCNN 1.1. Contribution end-to-end深度学习应用在超分辨领域的开山之作(非 end-to-end 见 Story.3 ). 指出了超分辨方向上传统方法( spar ...
- 二叉树学习笔记(未完待续)
摘要 二叉树学习笔记(未完待续). 博客 IT老兵驿站. 前言 昨天(2019-11-07)复习红黑树,发现红黑树和二叉树密不可分,所以这里再复习一下二叉树. 在大学的时候,这块我很认真地学习了一遍. ...
- 学习前端的实用网站——未完待续
学习前端的实用网站--未完待续 一.学习类网站 1.视频教程 2.技术分享 3.大牛博客 4.参考手册 二.功能类网站 1.颜色 2.图标 3.字体 4.素材 三.辅助类网站 一.学习类网站 1.视频 ...
- 多标签学习之讲座版 (内部讨论, 未完待续)
摘要: 多标签学习是一种常见的, 而并非小众的机器学习问题. 本贴为专题讲座准备. 1. 基本数据模型 定义1. 多标签数据为一个二元组: S=(X,Y),(1)S = (\mathbf{X}, \m ...
- 【未完待续】综述:用于视频分割(Video Segmentation)的深度学习
A Survey on Deep Learning Technique for Video Segmentation 0. 摘要 本文回顾视频分割的两条基本研究路线:视频目标分割(object seg ...
- 【深度学习】波尔次曼机,受限波尔兹曼机,DBN详解
神经网络自20世纪50年代发展起来后,因其良好的非线性能力.泛化能力而备受关注.然而,传统的神经网络仍存在一些局限,在上个世纪90年代陷入衰落,主要有以下几个原因: 1.传统的神经网络一般都是单隐层, ...
最新文章
- 网络高可用性解决方案
- 一道题,最小操作次数使数组元素相等引发的思考
- 有线节点与无线节点的混合仿真模拟实验
- 前端学习(1949)vue之电商管理系统电商系统之实现分页
- lambda写法(多参数)
- jsonArray与 jsonObject区别与js取值
- php tp框架分页源代码,ThinkPHP3.2框架自带分页功能实现方法示例
- java simplejson_JSON.simple首页、文档和下载 - JSON/BSON开发包 - OSCHINA - 中文开源技术交流社区...
- 转载:SQL server2005 里面没有management studio!下载SQL开发版本
- tomcat组播实现session一致性_java会话技术-Session
- 汇编:在BUFFER中定义了的十个带符号字,将其中的负数变成绝对值,并以十进制方式输出
- 如何评估一个ECG分析算法或设备
- 淘宝/天猫添加收货地址 API
- [软件更新]LeadBBS 6.0正式发布(更新日志和下载)
- 华硕笔记本进bios按哪个键 华硕手提电脑怎么进bios设置
- sql 自定义排序 顺序
- mysql的填充因子_sql server填充因子
- iOS 通讯录-获取联系人属性
- 告诉你,如何成就DBA职业生涯
- Semantic Structure-BasedWord Embedding by Incorporating Concept Convergence and Word Divergence
热门文章
- 实战SSM_O2O商铺_44【DES加密】 关键配置信息进行DES加密
- 实战SSM_O2O商铺_26【商品类别】批量新增商品类别从Dao到View层的开发
- TCP/IP 协议标准简单描述
- 腾讯面试题 Prometheus-PQL
- ar 微信小程序_小程序可以实现AR效果了 微信还为开发者提供了基础能力支持
- 学习Kotlin(一)为什么使用Kotlin
- c 获取char*的长度_最大的 String 字符长度是多少?
- 微信小程序勾选协议与提交按钮联动
- 电磁悬浮控制系统仿真设计
- pip和conda批量导出、安装第三方依赖库(requirements.txt)