MXNet 定义新激活函数（Custom new activation function）

https://blog.csdn.net/weixin_34260991/article/details/87106463

这里使用比较简单的定义方式，只是在原有的激活函数调用中加入。

准备工作
下载MXNet源代码，确认可以顺利编译通过。推荐在Linux下进行此操作:

https://mxnet.incubator.apache.org/get_started/install.html

编写激活函数先前和先后传递
在src/operator/mshadow_op.h里面，加入新的激活函数向前传递和向后的函数：

/*!
* \brief RBF Unit
* \author Yuzhong Liu
*/
struct rbf {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType x) {
return DType(expf(-x*x));
}
};

struct rbf_grad {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType x, DType a) {
return DType(-2 * x * a);
}
};
添加调用方法
在src/operator/leaky_relu-inl.h里面，激活函数的调用方式：

namespace leakyrelu {
enum LeakyReLUOpInputs {kData, kGamma};
enum LeakyReLUOpOutputs {kOut, kMask};
# 定义新的激活函数名称
enum LeakyReLUOpType {kLeakyReLU, kPReLU, kRReLU, kELU, kRBF};
enum LeakyReLUOpResource {kRandom};
} // namespace leakyrelu

struct LeakyReLUParam : public dmlc::Parameter<LeakyReLUParam> {
// use int for enumeration
int act_type;
float slope;
float lower_bound;
float upper_bound;
DMLC_DECLARE_PARAMETER(LeakyReLUParam) {
DMLC_DECLARE_FIELD(act_type).set_default(leakyrelu::kLeakyReLU)
.add_enum("rrelu", leakyrelu::kRReLU)
.add_enum("leaky", leakyrelu::kLeakyReLU)
.add_enum("prelu", leakyrelu::kPReLU)
.add_enum("elu", leakyrelu::kELU)
# 添加激活函数枚举
.add_enum("rbf", leakyrelu::kRBF)
.describe("Activation function to be applied.");
DMLC_DECLARE_FIELD(slope).set_default(0.25f)
.describe("Init slope for the activation. (For leaky and elu only)");
DMLC_DECLARE_FIELD(lower_bound).set_default(0.125f)
.describe("Lower bound of random slope. (For rrelu only)");
DMLC_DECLARE_FIELD(upper_bound).set_default(0.334f)
.describe("Upper bound of random slope. (For rrelu only)");
}
};

template<typename xpu>
class LeakyReLUOp : public Operator {
public:
explicit LeakyReLUOp(LeakyReLUParam param) {
param_ = param;
}

virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
size_t expected = param_.act_type == leakyrelu::kPReLU ? 2 : 1;
CHECK_EQ(in_data.size(), expected);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 3> data;
Tensor<xpu, 3> out;
Tensor<xpu, 3> mask;
Tensor<xpu, 1> weight;
int n = in_data[leakyrelu::kData].shape_[0];
int k = in_data[leakyrelu::kData].shape_[1];
Shape<3> dshape = Shape3(n, k, in_data[leakyrelu::kData].Size()/n/k);
data = in_data[leakyrelu::kData].get_with_shape<xpu, 3, real_t>(dshape, s);
out = out_data[leakyrelu::kOut].get_with_shape<xpu, 3, real_t>(dshape, s);
if (param_.act_type == leakyrelu::kRReLU) {
mask = out_data[leakyrelu::kMask].get_with_shape<xpu, 3, real_t>(dshape, s);
}
switch (param_.act_type) {
case leakyrelu::kLeakyReLU: {
Assign(out, req[leakyrelu::kOut], F<mshadow_op::xelu>(data, param_.slope));
break;
}
case leakyrelu::kPReLU: {
weight = in_data[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
Assign(out, req[leakyrelu::kOut],
F<mshadow_op::xelu>(data, broadcast<1>(weight, out.shape_)));
break;
}
case leakyrelu::kRReLU: {
if (ctx.is_train) {
Random<xpu>* prnd = ctx.requested[leakyrelu::kRandom].get_random<xpu, real_t>(s);
mask = prnd->uniform(mask.shape_);
mask = mask * (param_.upper_bound - param_.lower_bound) + param_.lower_bound;
Assign(out, req[leakyrelu::kOut], F<mshadow_op::xelu>(data, mask));
} else {
const float slope = (param_.lower_bound + param_.upper_bound) / 2.0f;
Assign(out, req[leakyrelu::kOut], F<mshadow_op::xelu>(data, slope));
}
break;
}
case leakyrelu::kELU: {
Assign(out, req[leakyrelu::kOut], F<mshadow_op::elu>(data, param_.slope));
break;
}
# RBF向前
case leakyrelu::kRBF: {
Assign(out, req[leakyrelu::kOut], F<mshadow_op::rbf>(data));
break;
}
default:
LOG(FATAL) << "Not implmented";
}
}

virtual void Backward(const OpContext & ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
size_t expected = param_.act_type == leakyrelu::kPReLU ? 2 : 1;
CHECK_EQ(out_grad.size(), 1U);
CHECK_EQ(req.size(), expected);
CHECK_EQ(in_data.size(), expected);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 3> output;
Tensor<xpu, 3> data;
Tensor<xpu, 3> gdata;
Tensor<xpu, 3> grad;
Tensor<xpu, 3> mask;
Tensor<xpu, 1> weight;
Tensor<xpu, 1> grad_weight;
int n = out_grad[leakyrelu::kOut].shape_[0];
int k = out_grad[leakyrelu::kOut].shape_[1];
Shape<3> dshape = Shape3(n, k, out_grad[leakyrelu::kOut].Size()/n/k);
grad = out_grad[leakyrelu::kOut].get_with_shape<xpu, 3, real_t>(dshape, s);
gdata = in_grad[leakyrelu::kData].get_with_shape<xpu, 3, real_t>(dshape, s);
output = out_data[leakyrelu::kOut].get_with_shape<xpu, 3, real_t>(dshape, s);
if (param_.act_type == leakyrelu::kRReLU) {
mask = out_data[leakyrelu::kMask].get_with_shape<xpu, 3, real_t>(dshape, s);
}
if (param_.act_type == leakyrelu::kPReLU) {
data = in_data[leakyrelu::kData].get_with_shape<xpu, 3, real_t>(dshape, s);
}
switch (param_.act_type) {
case leakyrelu::kLeakyReLU: {
Assign(gdata, req[leakyrelu::kData], F<mshadow_op::xelu_grad>(output, param_.slope) * grad);
break;
}
case leakyrelu::kPReLU: {
weight = in_data[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
grad_weight = in_grad[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
grad_weight = sumall_except_dim<1>(F<prelu_grad>(data) * grad);
gdata = F<mshadow_op::xelu_grad>(data, broadcast<1>(weight, data.shape_)) * grad;
break;
}
case leakyrelu::kRReLU: {
Assign(gdata, req[leakyrelu::kData], F<mshadow_op::xelu_grad>(output, mask) * grad);
break;
}
case leakyrelu::kELU: {
Assign(gdata, req[leakyrelu::kData], F<mshadow_op::elu_grad>(output, param_.slope) * grad);
break;
}
# RBF向前
case leakyrelu::kRBF: {
data = in_data[leakyrelu::kData].get_with_shape<xpu, 3, real_t>(dshape, s);
Assign(gdata, req[leakyrelu::kData], F<mshadow_op::rbf_grad>(data, output) * grad);
break;
}
default:
LOG(FATAL) << "Not implmented";
}
}

private:
LeakyReLUParam param_;
}; // class LeakyReLUOp
从重新编译，并测试
import mxnet as mx
from mxnet import autograd
a = mx.nd.random_uniform(-1, 1, shape=[3, 3]) +0.5
a.attach_grad()

with autograd.record():
b = mx.nd.LeakyReLU(data=a, act_type='rbf')

print a, b
参考资料
https://mxnet.incubator.apache.org/how_to/new_op.html
http://blog.csdn.net/qq_20965753/article/details/66975622?utm_source=debugrun&utm_medium=referral
---------------------
作者：weixin_34260991
来源：CSDN
原文：https://blog.csdn.net/weixin_34260991/article/details/87106463
版权声明：本文为博主原创文章，转载请附上博文链接！

转载于:https://www.cnblogs.com/jukan/p/10938237.html

MXNet 定义新激活函数（Custom new activation function）相关推荐

Homepage Machine Learning Algorithm 浅谈深度学习中的激活函数 - The Activation Function in Deep Learning
原文地址:http://www.cnblogs.com/rgvb178/p/6055213.html 版权声明:本文为博主原创文章,未经博主允许不得转载. 激活函数的作用首先,激活函数不是真的要去激 ...
浅谈深度学习中的激活函数 - The Activation Function in Deep Learning
原文地址:http://www.cnblogs.com/rgvb178/p/6055213.html 版权声明:本文为博主原创文章,未经博主允许不得转载. 激活函数的作用首先,激活函数不是真的要去激 ...
激活函数（Activation Function）及十大常见激活函数
目录 1 激活函数的概念和作用 1.1 激活函数的概念 1.2 激活函数的作用 1.3 通俗地理解一下激活函数(图文结合) 1.3.1 无激活函数的神经网络 1.3.2 带激活函数的神经网络 2 神经 ...
深度学习中常见的10种激活函数（Activation Function）总结
目录一:简介二:为什么要用激活函数三:激活函数的分类四:常见的几种激活函数 4.1.Sigmoid函数 4.2.Tanh函数 4.3.ReLU函数 4.4.Leaky Relu函数 4.5.P ...
[TensorFlow 学习笔记-06]激活函数(Activation Function)
[版权说明] TensorFlow 学习笔记参考: 李嘉璇著 TensorFlow技术解析与实战黄文坚唐源著 TensorFlow实战郑泽宇顾思宇著 TensorFlow实战Googl ...
激活函数（activation function）
激活函数activation function 激活函数的介绍阶跃函数(Step Function)--- 最简单的二分类非线性激活函数开始 mish激活函数激活函数的介绍首先看一个普通的神经网 ...
激活函数 activation function
文章目录激活函数 activation function Sigmoid Sigmoid 反向传播 Tanh ReLU Dead ReLU Problem 产生的原因激活函数 activation ...
Pytorch——激活函数(Activation Function)
文章目录 1.非线性方程 2.激活函数 3.常用选择 4.代码 1.非线性方程我们为什么要使用激励函数? 用简单的语句来概括. 就是因为, 现实并没有我们想象的那么美好, 它是残酷多变的. 哈哈, ...
各种activation function(激活函数) 简介
之前在使用activation function的时候只是根据自己的经验来用,例如二分类使用sigmoid或者softmax,多分类使用softmax,Dense一般都是Relu,例如tanh几乎没用 ...

MXNet 定义新激活函数（Custom new activation function）

MXNet 定义新激活函数（Custom new activation function）相关推荐

最新文章

热门文章