module 'mxnet.symbol' has no attribute 'LSoftmax'

新版的mxnet好像没有这一层了,解决方法:

还不知道怎么用?

参考:

https://github.com/DHCZ/tool_code/blob/79952e2612ef882d1bd36d637556b0c2fad547ce/MXnet/lsoftmax.py

https://github.com/540928898/Re-idDQN/blob/7ff9ef4180a4aa994dce2e1349ee1a0ff7f49e07/rl-multishot-reid-master/baseline/lsoftmax.py

import os
import math
import mxnet as mx
import numpy as np# MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU
os.environ['MXNET_CPU_WORKER_NTHREADS'] = '2'class LSoftmaxOp(mx.operator.CustomOp):'''LSoftmax from <Large-Margin Softmax Loss for Convolutional Neural Networks>'''def __init__(self, margin, beta, beta_min, scale):self.margin = int(margin)self.beta = float(beta)self.beta_min = float(beta_min)self.scale = float(scale)self.c_map = []self.k_map = []c_m_n = lambda m, n: math.factorial(n) / math.factorial(m) / math.factorial(n-m)for i in range(margin+1):self.c_map.append(c_m_n(i, margin))self.k_map.append(math.cos(i * math.pi / margin))def find_k(self, cos_t):'''find k for cos(theta)'''# for numeric issueeps = 1e-5le = lambda x, y: x < y or abs(x-y) < epsfor i in range(self.margin):if le(self.k_map[i+1], cos_t) and le(cos_t, self.k_map[i]):return iraise ValueError('can not find k for cos_t = %f'%cos_t)def calc_cos_mt(self, cos_t):'''calculate cos(m*theta)'''cos_mt = 0sin2_t = 1 - cos_t * cos_tflag = -1for p in range(self.margin / 2 + 1):flag *= -1cos_mt += flag * self.c_map[2*p] * pow(cos_t, self.margin-2*p) * pow(sin2_t, p)return cos_mtdef forward(self, is_train, req, in_data, out_data, aux):assert len(in_data) == 3assert len(out_data) == 1assert len(req) == 1x, label, w = in_datax = x.asnumpy()w =  w.asnumpy()label = label.asnumpy()#print "lsoftmax label", labeleps= 1e-5# original fully connectedout = x.dot(w.T)if is_train:# large margin fully connectedn = label.shape[0]w_norm = np.linalg.norm(w, axis=1)x_norm = np.linalg.norm(x, axis=1)for i in range(n):j = yi = int(label[i])f = out[i, yi]cos_t = f / (w_norm[yi] * x_norm[i]+eps)# calc k and cos_mtk = self.find_k(cos_t)cos_mt = self.calc_cos_mt(cos_t)# f_i_j = (\beta * f_i_j + fo_i_j) / (1 + \beta)fo_i_j = ff_i_j = (pow(-1, k) * cos_mt - 2*k) * (w_norm[yi] * x_norm[i])#print j,yi,cos_t,k,cos_mt,fo_i_j,f_i_jout[i, yi] = (f_i_j + self.beta * fo_i_j) / (1 + self.beta)self.assign(out_data[0], req[0], mx.nd.array(out))def backward(self, req, out_grad, in_data, out_data, in_grad, aux):assert len(in_data) == 3assert len(out_grad) == 1assert len(in_grad) == 3assert len(req) == 3x, label, w = in_datax = x.asnumpy()w = w.asnumpy()label = label.asnumpy()o_grad = out_grad[0].asnumpy()# original fully connectedx_grad = o_grad.dot(w)w_grad = o_grad.T.dot(x)# large margin fully connectedn = label.shape[0]  # batch sizem = w.shape[0]  # number of classesmargin = self.margin  # marginfeature_dim = w.shape[1]  # feature dimensioncos_t = np.zeros(n, dtype=np.float32)  # cos(theta)cos_mt = np.zeros(n, dtype=np.float32)  # cos(margin * theta)sin2_t = np.zeros(n, dtype=np.float32)  # sin(theta) ^ 2fo = np.zeros(n, dtype=np.float32)  # fo_i = dot(x_i, w_yi)k = np.zeros(n, dtype=np.int32)x_norm = np.linalg.norm(x, axis=1)w_norm = np.linalg.norm(w, axis=1)eps=1e-5for i in range(n):j = yi = int(label[i])f = w[yi].dot(x[i])cos_t[i] = f / (w_norm[yi] * x_norm[i]+eps)k[i] = self.find_k(cos_t[i])cos_mt[i] = self.calc_cos_mt(cos_t[i])sin2_t[i] = 1 - cos_t[i]*cos_t[i]fo[i] = f# gradient w.r.t. x_ifor i in range(n):# df / dx at x = x_i, w = w_yij = yi = int(label[i])dcos_dx = w[yi] / (w_norm[yi]*x_norm[i]+eps) - x[i] * fo[i] / (w_norm[yi]*pow(x_norm[i], 3)+eps)dsin2_dx = -2 * cos_t[i] * dcos_dxdcosm_dx = margin*pow(cos_t[i], margin-1) * dcos_dx  # p = 0flag = 1for p in range(1, margin / 2 + 1):flag *= -1dcosm_dx += flag * self.c_map[2*p] * ( \p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dx + \(margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dx)df_dx = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * w_norm[yi] / (x_norm[i]+eps) * x[i] + \pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dxalpha = 1 / (1 + self.beta)x_grad[i] += alpha * o_grad[i, yi] * (df_dx - w[yi])# gradient w.r.t. w_jfor j in range(m):dw = np.zeros(feature_dim, dtype=np.float32)for i in range(n):yi = int(label[i])if yi == j:# df / dw at x = x_i, w = w_yi and yi == jdcos_dw = x[i] / (w_norm[yi]*x_norm[i]+eps) - w[yi] * fo[i] / (x_norm[i]*pow(w_norm[yi], 3)+eps)dsin2_dw = -2 * cos_t[i] * dcos_dwdcosm_dw = margin*pow(cos_t[i], margin-1) * dcos_dw  # p = 0flag = 1for p in range(1, margin / 2 + 1):flag *= -1dcosm_dw += flag * self.c_map[2*p] * ( \p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dw + \(margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dw)df_dw_j = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * x_norm[i] / (w_norm[yi]+eps) * w[yi] + \pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dwdw += o_grad[i, yi] * (df_dw_j - x[i])alpha = 1 / (1 + self.beta)w_grad[j] += alpha * dwself.assign(in_grad[0], req[0], mx.nd.array(x_grad))self.assign(in_grad[2], req[2], mx.nd.array(w_grad))# dirty hack, should also work for multi devicesself.beta *= self.scaleself.beta = max(self.beta, self.beta_min)@mx.operator.register("LSoftmax")
class LSoftmaxProp(mx.operator.CustomOpProp):def __init__(self, num_hidden, beta, margin, scale=1, beta_min=0):super(LSoftmaxProp, self).__init__(need_top_grad=True)self.margin = int(margin)self.num_hidden = int(num_hidden)self.beta = float(beta)self.beta_min = float(beta_min)self.scale = float(scale)def list_arguments(self):return ['data', 'label', 'weight']def list_outputs(self):return ['output']def infer_shape(self, in_shape):#print in_shapeassert len(in_shape) == 3, "LSoftmaxOp input data: [data, label, weight]"dshape = in_shape[0]lshape = in_shape[1]assert len(dshape) == 2, "data shape should be (batch_size, feature_dim)"assert len(lshape) == 1, "label shape should be (batch_size,)"wshape = (self.num_hidden, dshape[1])oshape = (dshape[0], self.num_hidden)return [dshape, lshape, wshape], [oshape,], []def create_operator(self, ctx, shapes, dtypes):return LSoftmaxOp(margin=self.margin, beta=self.beta, beta_min=self.beta_min, scale=self.scale)def test_op():"""test LSoftmax Operator"""# build symbolbatch_size = cmd_args.batch_sizeembedding_dim = cmd_args.embedding_dimnum_classes = cmd_args.num_classesdata = mx.sym.Variable('data')label = mx.sym.Variable('label')weight = mx.sym.Variable('weight')args = {'data': np.random.normal(0, 1, (batch_size, embedding_dim)),'weight': np.random.normal(0, 1, (num_classes, embedding_dim)),'label': np.random.choice(num_classes, batch_size),}if cmd_args.op_impl == 'py':symbol = mx.sym.Custom(data=data, label=label, weight=weight, num_hidden=10,beta=cmd_args.beta, margin=cmd_args.margin, scale=cmd_args.scale,op_type='LSoftmax', name='lsoftmax')else:symbol = mx.sym.LSoftmax(data=data, label=label, weight=weight, num_hidden=num_classes,margin=cmd_args.margin, beta=cmd_args.beta, scale=cmd_args.scale,name='lsoftmax')data_shape = (batch_size, embedding_dim)label_shape = (batch_size,)weight_shape = (num_classes, embedding_dim)ctx = mx.cpu() if cmd_args.op_impl == 'py' else mx.gpu()executor = symbol.simple_bind(ctx=ctx, data=data_shape, label=label_shape, weight=weight_shape)def forward(data, label, weight):data = mx.nd.array(data, ctx=ctx)label = mx.nd.array(label, ctx=ctx)weight = mx.nd.array(weight, ctx=ctx)executor.forward(is_train=True, data=data, label=label, weight=weight)return executor.output_dict['lsoftmax_output'].asnumpy()def backward(out_grad):executor.backward(out_grads=[mx.nd.array(out_grad, ctx=ctx)])return executor.grad_dictdef gradient_check(name, i, j):'''gradient check on x[i, j]'''eps = 1e-4threshold = 1e-2reldiff = lambda a, b: abs(a-b) / (abs(a) + abs(b))# calculate by backwardoutput = forward(data=args['data'], weight=args['weight'], label=args['label'])grad_dict = backward(output)grad = grad_dict[name].asnumpy()[i, j]# calculate by \delta f / 2 * epsloss = lambda x: np.square(x).sum() / 2args[name][i, j] -= epsloss1 = loss(forward(data=args['data'], weight=args['weight'], label=args['label']))args[name][i, j] += 2 * epsloss2 = loss(forward(data=args['data'], weight=args['weight'], label=args['label']))grad_expect = (loss2 - loss1) / (2 * eps)# checkrel_err = reldiff(grad_expect, grad)if rel_err > threshold:print 'gradient check failed'print 'expected %lf given %lf, relative error %lf'%(grad_expect, grad, rel_err)return Falseelse:print 'gradient check pass'return True# test forwardoutput = forward(data=args['data'], weight=args['weight'], label=args['label'])diff = args['data'].dot(args['weight'].T) - output# test backward# gradient check on datadata_gc_pass = 0for i in range(args['data'].shape[0]):for j in range(args['data'].shape[1]):print 'gradient check on data[%d, %d]'%(i, j)if gradient_check('data', i, j):data_gc_pass += 1# gradient check on weightweight_gc_pass = 0for i in range(args['weight'].shape[0]):for j in range(args['weight'].shape[1]):print 'gradient check on weight[%d, %d]'%(i, j)if gradient_check('weight', i, j):weight_gc_pass += 1print '===== Summary ====='print 'gradient on data pass ratio is %lf'%(float(data_gc_pass) / args['data'].size)print 'gradient on weight pass ratio is %lf'%(float(weight_gc_pass) / args['weight'].size)if __name__ == '__main__':import argparseparser = argparse.ArgumentParser()parser.add_argument('--batch-size', type=int, default=32, help="test batch size")parser.add_argument('--num-classes', type=int, default=10, help="test number of classes")parser.add_argument('--embedding-dim', type=int, default=3, help="test embedding dimension")parser.add_argument('--margin', type=int, default=2, help="test lsoftmax margin")parser.add_argument('--beta', type=float, default=10, help="test lsoftmax beta")parser.add_argument('--scale', type=float, default=1, help="beta scale of every mini-batch")parser.add_argument('--op-impl', type=str, choices=['py', 'cpp'], default='py', help="test op implementation")cmd_args = parser.parse_args()print cmd_args# checkif cmd_args.op_impl == 'cpp':try:op_creator = mx.sym.LSoftmaxexcept AttributeError:print 'No cpp operator for LSoftmax, Skip test'import syssys.exit(0)test_op()

这个好像是扩展,不是本身的lsoftmax:

https://github.com/luoyetx/mx-lsoftmax

pytorch的lsoftmax:

https://github.com/jihunchoi/lsoftmax-pytorch/blob/master/lsoftmax.py

import mathimport torch
from torch import nn
from torch.autograd import Variablefrom scipy.special import binomclass LSoftmaxLinear(nn.Module):def __init__(self, input_dim, output_dim, margin):super().__init__()self.input_dim = input_dimself.output_dim = output_dimself.margin = marginself.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim))self.divisor = math.pi / self.marginself.coeffs = binom(margin, range(0, margin + 1, 2))self.cos_exps = range(self.margin, -1, -2)self.sin_sq_exps = range(len(self.cos_exps))self.signs = [1]for i in range(1, len(self.sin_sq_exps)):self.signs.append(self.signs[-1] * -1)def reset_parameters(self):nn.init.kaiming_normal(self.weight.data.t())def find_k(self, cos):acos = cos.acos()k = (acos / self.divisor).floor().detach()return kdef forward(self, input, target=None):if self.training:assert target is not Nonelogit = input.matmul(self.weight)batch_size = logit.size(0)logit_target = logit[range(batch_size), target]weight_target_norm = self.weight[:, target].norm(p=2, dim=0)input_norm = input.norm(p=2, dim=1)# norm_target_prod: (batch_size,)norm_target_prod = weight_target_norm * input_norm# cos_target: (batch_size,)cos_target = logit_target / (norm_target_prod + 1e-10)sin_sq_target = 1 - cos_target**2num_ns = self.margin//2 + 1# coeffs, cos_powers, sin_sq_powers, signs: (num_ns,)coeffs = Variable(input.data.new(self.coeffs))cos_exps = Variable(input.data.new(self.cos_exps))sin_sq_exps = Variable(input.data.new(self.sin_sq_exps))signs = Variable(input.data.new(self.signs))cos_terms = cos_target.unsqueeze(1) ** cos_exps.unsqueeze(0)sin_sq_terms = (sin_sq_target.unsqueeze(1)** sin_sq_exps.unsqueeze(0))cosm_terms = (signs.unsqueeze(0) * coeffs.unsqueeze(0)* cos_terms * sin_sq_terms)cosm = cosm_terms.sum(1)k = self.find_k(cos_target)ls_target = norm_target_prod * (((-1)**k * cosm) - 2*k)logit[range(batch_size), target] = ls_targetreturn logitelse:assert target is Nonereturn input.matmul(self.weight)

module ‘mxnet.symbol‘ has no attribute ‘LSoftmax‘相关推荐

  1. mxnet报错解决:AttributeError: module 'mxnet.context' has no attribute 'num_gpus'

    早上在跑mxnet的mnist的示例代码时报错了,代码如下: import mxnet as mx ctx = mx.gpu(0) if mx.context.num_gpus() > 0 el ...

  2. 不降低scipy版本解决AttributeError: module ‘scipy.misc‘ has no attribute ‘imresize‘问题

    问题描述 在Tensorflow2.1.0上运行代码出现 AttributeError: module 'scipy.misc' has no attribute 'imresize' 查阅资料大多都 ...

  3. module 'scipy.misc' has no attribute 'imresize'

    报错 module 'scipy.misc' has no attribute 'imresize' import scipy.misc scipy.misc.imresize(img, (im_he ...

  4. AttributeError: module ‘cv2.cv2‘ has no attribute ‘xfeatures2d‘解决方法

    运行一段关于图像拼接的代码时,出现问题 AttributeError: module 'cv2.cv2' has no attribute 'xfeatures2d' def __init__(sel ...

  5. AttributeError: module 'tensorflow_core.estimator' has no attribute 'inputs'

    AttributeError: module 'tensorflow_core.estimator' has no attribute 'inputs' 这是tensorflow版本问题,在tenso ...

  6. module ‘imgaug.augmenters‘ has no attribute ‘Resize‘

    paddleocr2.4 训练自己的数据集时,报错了 module 'imgaug.augmenters' has no attribute 'Resize' imgaug是个库,解决方法: pip ...

  7. module 'paddle.fluid' has no attribute 'data'

    module 'paddle.fluid' has no attribute 'data' 解决: pip install paddlepaddle-gpu==1.7.0.post107 -i htt ...

  8. module 'torch.jit' has no attribute 'unused'

    module 'torch.jit' has no attribute 'unused' 解决方法:安装新版torch: pip install torch===1.4.0 torchvision== ...

  9. 导入torchvision出现:AttributeError: module ‘torch.jit‘ has no attribute ‘unused‘错误

    导入torchvision出现:AttributeError: module 'torch.jit' has no attribute 'unused'错误 文章目录: 1 问题原因 2 解决方法 我 ...

最新文章

  1. WPF如何获得变量异步回调函数时产生的异步回调
  2. Nginx服务系列——缓存
  3. 如何编译Apache Hadoop2.2.0源代码
  4. 瑞士电信vCPE商用落地 华三通信NFV方案成最大功臣
  5. 把一个不等于0的数最右侧的1提取出来
  6. LeetCode 6034. 数组的三角和
  7. 23种设计模式之迭代器模式
  8. php进程通讯方式,PHP进程模型、进程通讯方式、进程线程的区别分别有哪些?
  9. Python爬虫(二十一)_Selenium与PhantomJS
  10. 血淋淋的教训—将Vue项目打包成app的跨域问题
  11. SAP License:雾里看花系列——SAP顾问应该脱离”保姆”的角色
  12. 6寸照片的尺寸是多少_各类证件照标准尺寸大全
  13. win定时关机_电脑快速关机的8种方法,很多人都不知道!
  14. kuangbin专题-简单搜索
  15. 17.3 构建LinuxPC端QT软件上的ARM编译套件并进行测试
  16. 塔防三国志服务器维护时间,塔防三国志中期玩家教程攻略详解
  17. 绝对值的计算(自用)
  18. (10)大类资产配置一升级版股债平衡
  19. gcc开启C99或C11标准支持
  20. keras入门教程 1.线性回归建模(快速入门)

热门文章

  1. duilib : 模态窗口
  2. Paste模块的世界
  3. python使用osgeo库_python中使用gdal,osgeo
  4. 写一个不能被继承的类(友元的不可继承性)
  5. 湘潭大学计算机科学与技术录取分数线,2016年湘潭大学计算机科学与技术专业在湖南录取分数线...
  6. xampp mysql 端口被占用_xampp安装后apache 80端口被占用的解决方法
  7. ios php range,PHP range() 函数 - PHP 基础教程
  8. mysql 配置32g内存_MySQL性能测试 : 新的InnoDB Double Write Buffer
  9. 西安电子科技大学计算机全国排名第几,全国五所电子类高校排名,北京邮电大学排名第三...
  10. 偷走不经意流逝的时光,摸鱼神器:神偷