DROO memory.py

这个是论文《Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Networks》的tf1.x版本代码，这部分是DNN网络的结构。
为方便自己看代码所以放上来，完整代码在文章作者的仓库：https://github.com/revenol/DROO。

#  #################################################################
#  This file contains memory operation including encoding and decoding operations.
#
# version 1.0 -- January 2018. Written by Liang Huang (lianghuang AT zjut.edu.cn)
#  #################################################################from __future__ import print_function
import tensorflow as tf
import numpy as np# DNN network for memory
class MemoryDNN:def __init__(self,net,learning_rate = 0.01,training_interval=10, batch_size=100, memory_size=1000,output_graph=False):# net: [n_input, n_hidden_1st, n_hidded_2ed, n_output]assert(len(net) is 4) # only 4-layer DNNself.net = netself.training_interval = training_interval # learn every #training_intervalself.lr = learning_rateself.batch_size = batch_sizeself.memory_size = memory_size# store all binary actionsself.enumerate_actions = []# stored # memory entryself.memory_counter = 1# store training costself.cost_his = []# reset graph tf.reset_default_graph()# initialize zero memory [h, m]self.memory = np.zeros((self.memory_size, self.net[0]+ self.net[-1]))# construct memory networkself._build_net()self.sess = tf.Session()# for tensorboardif output_graph:# $ tensorboard --logdir=logs# tf.train.SummaryWriter soon be deprecated, use followingtf.summary.FileWriter("logs/", self.sess.graph)self.sess.run(tf.global_variables_initializer())def _build_net(self):def build_layers(h, c_names, net, w_initializer, b_initializer):with tf.variable_scope('l1'):w1 = tf.get_variable('w1', [net[0], net[1]], initializer=w_initializer, collections=c_names)b1 = tf.get_variable('b1', [1, self.net[1]], initializer=b_initializer, collections=c_names)l1 = tf.nn.relu(tf.matmul(h, w1) + b1)with tf.variable_scope('l2'):w2 = tf.get_variable('w2', [net[1], net[2]], initializer=w_initializer, collections=c_names)b2 = tf.get_variable('b2', [1, net[2]], initializer=b_initializer, collections=c_names)l2 = tf.nn.relu(tf.matmul(l1, w2) + b2)with tf.variable_scope('M'):w3 = tf.get_variable('w3', [net[2], net[3]], initializer=w_initializer, collections=c_names)b3 = tf.get_variable('b3', [1, net[3]], initializer=b_initializer, collections=c_names)out = tf.matmul(l2, w3) + b3return out# ------------------ build memory_net ------------------self.h = tf.placeholder(tf.float32, [None, self.net[0]], name='h')  # inputself.m = tf.placeholder(tf.float32, [None, self.net[-1]], name='mode')  # for calculating lossself.is_train = tf.placeholder("bool") # train or evaluatewith tf.variable_scope('memory_net'):c_names, w_initializer, b_initializer = \['memory_net_params', tf.GraphKeys.GLOBAL_VARIABLES], \tf.random_normal_initializer(0., 1/self.net[0]), tf.constant_initializer(0.1)  # config of layersself.m_pred = build_layers(self.h, c_names, self.net, w_initializer, b_initializer)with tf.variable_scope('loss'):self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = self.m, logits = self.m_pred))with tf.variable_scope('train'):self._train_op = tf.train.AdamOptimizer(self.lr, 0.09).minimize(self.loss)def remember(self, h, m):# replace the old memory with new memoryidx = self.memory_counter % self.memory_sizeself.memory[idx, :] = np.hstack((h,m))self.memory_counter += 1def encode(self, h, m):# encoding the entryself.remember(h, m)# train the DNN every 10 step
#        if self.memory_counter> self.memory_size / 2 and self.memory_counter % self.training_interval == 0:if self.memory_counter % self.training_interval == 0:self.learn()def learn(self):# sample batch memory from all memoryif self.memory_counter > self.memory_size:sample_index = np.random.choice(self.memory_size, size=self.batch_size)else:sample_index = np.random.choice(self.memory_counter, size=self.batch_size)batch_memory = self.memory[sample_index, :]h_train = batch_memory[:, 0: self.net[0]]m_train = batch_memory[:, self.net[0]:]# print(h_train)# print(m_train)# train the DNN_, self.cost = self.sess.run([self._train_op, self.loss], feed_dict={self.h: h_train, self.m: m_train})assert(self.cost >0)    self.cost_his.append(self.cost)def decode(self, h, k = 1, mode = 'OP'):# to have batch dimension when feed into tf placeholderh = h[np.newaxis, :]m_pred = self.sess.run(self.m_pred, feed_dict={self.h: h})if mode is 'OP':return self.knm(m_pred[0], k)elif mode is 'KNN':return self.knn(m_pred[0], k)else:print("The action selection must be 'OP' or 'KNN'")def knm(self, m, k = 1):# return k-nearest-modem_list = []# generate the ﬁrst binary ofﬂoading decision # note that here 'm' is the output of DNN before the sigmoid activation function, in the field of all real number. # Therefore, we compare it with '0' instead of 0.5 in equation (8). Since, sigmod(0) = 0.5.m_list.append(1*(m>0))if k > 1:# generate the remaining K-1 binary ofﬂoading decisions with respect to equation (9)m_abs = abs(m)idx_list = np.argsort(m_abs)[:k-1]for i in range(k-1):if m[idx_list[i]] >0:# set a positive user to 0m_list.append(1*(m - m[idx_list[i]] > 0))else:# set a negtive user to 1m_list.append(1*(m - m[idx_list[i]] >= 0))return m_listdef knn(self, m, k = 1):# list all 2^N binary offloading actionsif len(self.enumerate_actions) is 0:import itertoolsself.enumerate_actions = np.array(list(map(list, itertools.product([0, 1], repeat=self.net[0]))))# the 2-normsqd = ((self.enumerate_actions - m)**2).sum(1)idx = np.argsort(sqd)return self.enumerate_actions[idx[:k]]def plot_cost(self):import matplotlib.pyplot as pltplt.plot(np.arange(len(self.cost_his))*self.training_interval, self.cost_his)plt.ylabel('Training Loss')plt.xlabel('Time Frames')plt.show()

DROO memory.py相关推荐

DROO demo_alternate_weights.py
为方便自己看代码所以放上来,看完就删.完整代码在文章作者的仓库:https://github.com/revenol/DROO. 这个是论文<Deep Reinforcement Learnin ...
DROO main.py
是论文<Deep Reinforcement Learning for Online Ofﬂoading in Wireless Powered Mobile-Edge Computing Ne ...
DROO demo_on_off.py
完整代码在文章作者的仓库:https://github.com/revenol/DROO.为方便自己看代码所以放上来,看完就删. 这个是论文<Deep Reinforcement Learnin ...
DROO optimization.py
为方便自己看代码所以放上来,看完就删.完整代码在文章作者的仓库:https://github.com/revenol/DROO. 这个是论文<Deep Reinforcement Learnin ...
python项目超级大脑-python项目之超级大脑
超级大脑程序说明我们已经为你准备了程序模板:memory.py,模板中导入了必要的模块和一些全局变量,你需要编写一些函数,实现这个游戏. 通过该项目你可以巩固对鼠标事件驱动编程,函数和列表的理解. ...
python paramiko使用_使用python的paramiko模块实现ssh与scp功能
#1. 介绍这篇文章简单地介绍了python的paramiko模块的用法,paramiko实现了SSH协议,能够方便地与远程计算机交互.简单的说,就是你在terminal下执行的如下语句,现在可以通 ...
CV之detectron2：detectron2安装过程记录
CV之detectron2:detectron2安装过程记录 detectron2安装记录 python setup.py build develop Microsoft Windows [版本 10 ...
CMDB学习之三数据采集
判断系统因为是公用的方法,所有要写基类方法使用,首先在插件中创建一个基类将插件文件继承基类思路是创建基类使用handler.cmd ,命令去获取系统信息,然后进行判断,然后去执行磁盘 ,cpu, ...
Django项目：CMDB(服务器硬件资产自动采集系统)--11--07CMDB文件模式测试采集硬件数据...
1 #settings.py 2 # --------01CMDB获取服务器基本信息-------- 3 import os 4 5 BASEDIR = os.path.dirname(os.path ...

DROO memory.py

DROO memory.py相关推荐

最新文章

热门文章