grad-cam用于SSD目标检测

参考链接: jacobgil/pytorch-grad-cam

代码:

import torch
import argparse
import cv2
import numpy as np
import torch.nn as nn
from torch.autograd import Function
from torchvision import models, transforms
from ssd import SSD
from nets.ssd import get_ssd
from PIL import Image, ImageDraw
from utils.box_utils import letterbox_image,ssd_correct_boxes
MEANS = (104, 117, 123)class FeatureExtractor():""" Class for extracting activations andregistering gradients from targetted intermediate layers """def __init__(self, model, target_layers):# FeatureExtractor(model.layer4, ["2"])self.model = model  # model.layer4self.target_layers = target_layers  # ["2"]self.gradients = []def save_gradient(self, grad):self.gradients.append(grad)  # torch.Size([1, 2048, 7, 7])def __call__(self, x):  # torch.Size([1, 1024, 14, 14])outputs = []self.gradients = []for name, module in self.model._modules.items():# '0'、 '1'、 '2'x = module(x)if name in self.target_layers:  # ["2"]x.register_hook(self.save_gradient)outputs += [x]return outputs, x   # 单个元素的列表torch.Size([1, 2048, 7, 7]) torch.Size([1, 2048, 7, 7])class ModelOutputs():""" Class for making a forward pass, and getting:1. The network output.2. Activations from intermeddiate targetted layers.3. Gradients from intermeddiate targetted layers. """def __init__(self, model, feature_module, target_layers):# ModelOutputs(model, model.layer4, ["2"])self.model = model  # modelself.feature_module = feature_module  # model.layer4self.feature_extractor = FeatureExtractor(self.feature_module, target_layers)# FeatureExtractor(model.layer4, ["2"])def get_gradients(self):return self.feature_extractor.gradients  # 只有一个元素列表类型 torch.Size([1, 2048, 7, 7])def __call__(self, x):# target_activations = []  # 这行代码没有意义for name, module in self.model._modules.items():  # 遍历有序字典# 'conv1' 'bn1' 'relu' 'maxpool' 'layer1' # 'layer2' 'layer3' 'layer4'  'avgpool' 'fc'if module == self.feature_module:  # model.layer4target_activations, x = self.feature_extractor(x) # torch.Size([1, 1024, 14, 14]) -> torch.Size([1, 2048, 7, 7])elif "avgpool" in name.lower():  # 'avgpool'x = module(x)  # torch.Size([1, 2048, 7, 7]) -> torch.Size([1, 2048, 1, 1])x = x.view(x.size(0),-1)  # torch.Size([1, 2048])else:x = module(x)return target_activations, x  # 列表torch.Size([1, 2048, 7, 7]), torch.Size([1, 1000])def preprocess_image(img):'''将numpy的(H, W, RGB)格式多维数组转为张量后再进行指定标准化,最后再增加一个batchsize维度后返回'''normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])preprocessing = transforms.Compose([transforms.ToTensor(),normalize,])return preprocessing(img.copy()).unsqueeze(0)def show_cam_on_image(img, mask):'''将mask图片转化为热力图,叠加到img上,再返回np.uint8格式的图片.'''heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)heatmap = np.float32(heatmap) / 255cam = heatmap + np.float32(img)cam = cam / np.max(cam)return np.uint8(255 * cam)class GradCam:def __init__(self, model, feature_module, target_layer_names, use_cuda):# GradCam(model=model, feature_module=model.layer4, \#                target_layer_names=["2"], use_cuda=args.use_cuda)self.model = model  # modelself.feature_module = feature_module  # model.layer4self.model.eval()self.cuda = use_cudaif self.cuda:self.model = model.cuda()self.extractor = ModelOutputs(self.model, self.feature_module, target_layer_names)# ModelOutputs(model, model.layer4, ["2"])def forward(self, input_img):  # 似乎这个方法没有使用到,注释掉之后没有影响,没有被执行到print("林麻子".center(50,'-'))  # 这行打印语句用来证明,该方法并没有被调用执行.return self.model(input_img)  def __call__(self, input_img, target_category=None):if self.cuda:input_img = input_img.cuda()  # torch.Size([1, 3, 224, 224])features, output = self.extractor(input_img)  # 保存中间特征图的列表, 以及网络最后输出的分类结果# 列表[torch.Size([1, 2048, 7, 7])], 张量:torch.Size([1, 1000])if target_category == None:target_category = np.argmax(output.cpu().data.numpy())  # 多维数组展平后最大值的索引# <class 'numpy.int64'>  243one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)  # 独热编码,shape:(1, 1000)one_hot[0,target_category] = 1  # 独热编码  shape (1, 1000) # one_hot[0][target_category] = 1one_hot = torch.from_numpy(one_hot).requires_grad_(False)  # torch.Size([1, 1000]) # requires_grad_(True)if self.cuda:one_hot = one_hot.cuda()loss = torch.sum(one_hot * output)  # tensor(9.3856, grad_fn=<SumBackward0>) one_hot = torch.sum(one_hot * output)self.feature_module.zero_grad()  # 将模型的所有参数的梯度清零.self.model.zero_grad()  # 将模型的所有参数的梯度清零.loss.backward()  # one_hot.backward(retain_graph=True)  grads_val = self.extractor.get_gradients()[0].cpu().data.numpy()  # shape:(1, 2048, 7, 7)  # 顾名思义,梯度值# 注: self.extractor.get_gradients()[-1]返回保存着梯度的列表,[-1]表示最后一项,即最靠近输入的一组特征层上的梯度target = features[-1]  # torch.Size([1, 2048, 7, 7])  列表中的最后一项,也是唯一的一项,特征图target = target.cpu().data.numpy()[0, :]  # shape: (2048, 7, 7)weights = np.mean(grads_val, axis=(2, 3))[0, :]  # shape: (2048,)  计算每个特征图上梯度的均值,以此作为权重cam = np.zeros(target.shape[1:], dtype=np.float32)  # 获得零矩阵 shape: (7, 7)for i, w in enumerate(weights):  # 迭代遍历该权重cam += w * target[i, :, :]   # 使用该权重,对特征图进行线性组合cam = np.maximum(cam, 0)  # shape: (7, 7) # 相当于ReLU函数# print(type(input_img.shape[3:1:-1]),'cxq林麻子cxq',input_img.shape[3:1:-1])# print(type(input_img.shape[2:]),'cxq林麻子cxq',input_img.shape[2:])cam = cv2.resize(cam, input_img.shape[3:1:-1])  # shape: (224, 224) # 这里要留意传入的形状是(w,h) 所以这里切片的顺序是反过来的cam = cam - np.min(cam)  # shape: (224, 224)  # 以下两部是做归一化cam = cam / np.max(cam)  # shape: (224, 224)  # 归一化,取值返回是[0,1]return cam  # shape: (224, 224) 取值返回是[0,1]class GuidedBackpropReLU(Function):'''特殊的ReLU,区别在于反向传播时候只考虑大于零的输入和大于零的梯度''''''@staticmethoddef forward(ctx, input_img):  # torch.Size([1, 64, 112, 112])positive_mask = (input_img > 0).type_as(input_img)  # torch.Size([1, 64, 112, 112])# output = torch.addcmul(torch.zeros(input_img.size()).type_as(input_img), input_img, positive_mask)output = input_img * positive_mask  # 这行代码和上一行的功能相同ctx.save_for_backward(input_img, output)return output  # torch.Size([1, 64, 112, 112])'''# 上部分定义的函数功能和以下定义的函数一致@staticmethoddef forward(ctx, input_img):  # torch.Size([1, 64, 112, 112])output = torch.clamp(input_img, min=0.0)# print('函数中的输入张量requires_grad',input_img.requires_grad)ctx.save_for_backward(input_img, output)return output  # torch.Size([1, 64, 112, 112])@staticmethoddef backward(ctx, grad_output):  # torch.Size([1, 2048, 7, 7])input_img, output = ctx.saved_tensors  # torch.Size([1, 2048, 7, 7]) torch.Size([1, 2048, 7, 7])# grad_input = None  # 这行代码没作用positive_mask_1 = (input_img > 0).type_as(grad_output)  # torch.Size([1, 2048, 7, 7])  输入的特征大于零positive_mask_2 = (grad_output > 0).type_as(grad_output)  # torch.Size([1, 2048, 7, 7])  梯度大于零# grad_input = torch.addcmul(#                             torch.zeros(input_img.size()).type_as(input_img),#                             torch.addcmul(#                                             torch.zeros(input_img.size()).type_as(input_img), #                                             grad_output,#                                             positive_mask_1#                             ), #                             positive_mask_2# )grad_input = grad_output * positive_mask_1 * positive_mask_2  # 这行代码的作用和上一行代码相同return grad_inputclass GuidedBackpropReLU_Module_by_cxq(nn.Module):def __init__(self):super(GuidedBackpropReLU_Module_by_cxq, self).__init__()def forward(self, input):return GuidedBackpropReLU.apply(input)  def extra_repr(self):'''该方法用于打印信息'''return '我是由cxq实现的用于自定义GuidedBackpropReLU的网络模块...'class GuidedBackpropReLUModel:'''相对于某个类别(默认是最大置信度对应的类别)的置信度得分,计算输入图片上的梯度,并返回'''def __init__(self, model, use_cuda):  # GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)self.model = modelself.model.eval()self.cuda = use_cudaif self.cuda:self.model = model.cuda()def recursive_relu_apply(module_top):'''递归地将模块内的relu模块替换掉用户自己定义的GuidedBackpropReLU模块 '''for idx, module in module_top._modules.items():recursive_relu_apply(module)if module.__class__.__name__ == 'ReLU':  # module对象所属的类,该类的名称# print('成功替换...')  # 验证确实得到了替换# module_top._modules[idx] = GuidedBackpropReLU.apply  # 这是原始代码所使用的方式module_top._modules[idx] = GuidedBackpropReLU_Module_by_cxq()  # 这是本人cxq改进的方式# replace ReLU with GuidedBackpropReLUrecursive_relu_apply(self.model)# def forward(self, input_img):#     return self.model(input_img)def __call__(self, input_img, target_category=None):'''相对于某个类别(默认是最大置信度对应的类别)的置信度得分,计算输入图片上的梯度,并返回'''if self.cuda:input_img = input_img.cuda()input_img = input_img.requires_grad_(True)  # torch.Size([1, 3, 224, 224])output = self.model(input_img)[1]  # torch.Size([1, 8732, 3])# if target_category == None:#     target_category = np.argmax(output.cpu().data.numpy())  # 243# one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)  # (1, 1000)# one_hot[0, target_category] = 1  # one_hot[0][target_category] = 1# one_hot = torch.from_numpy(one_hot).requires_grad_(False)  # torch.Size([1, 1000])# # one_hot = torch.from_numpy(one_hot).requires_grad_(True)  # 这个张量不需要计算梯度# if self.cuda:#     one_hot = one_hot.cuda()# loss = torch.sum(one_hot * output)loss = torch.max(output)loss.backward()  # one_hot.backward(retain_graph=True)img_grad = input_img.grad.cpu().data.numpy()  # shape (1, 3, 224, 224)img_grad = img_grad[0, :, :, :]  # shape (3, 224, 224)return img_grad  # shape (3, 224, 224)def get_args():parser = argparse.ArgumentParser()parser.add_argument('--use-cuda', action='store_true', default=False,help='Use NVIDIA GPU acceleration')parser.add_argument('--image-path', type=str, default='3.jpg',  # default='./examples/1.jpg','2.jpg' # './examples/both.png'help='Input image path')  # default='./examples/both.png',args = parser.parse_args()args.use_cuda = args.use_cuda and torch.cuda.is_available()if args.use_cuda:print("Using GPU for acceleration")else:print("Using CPU for computation")return argsdef deprocess_image(img):'''先作标准化处理,然后做变换y=0.1*x+0.5,限定[0,1]区间后映射到[0,255]区间'''""" see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """img = img - np.mean(img)img = img / (np.std(img) + 1e-5)img = img * 0.1img = img + 0.5img = np.clip(img, 0, 1)return np.uint8(img*255)if __name__ == '__main__':""" python grad_cam.py <path_to_image>1. Loads an image with opencv.2. Preprocesses it for VGG19 and converts to a pytorch variable.3. Makes a forward pass to find the category index with the highest score,and computes intermediate activations.Makes the visualization. """args = get_args()  # 默认情况下: args.image_path = './examples/both.png', # 默认情况下: args.use_cuda = False, ssd = SSD()model = get_ssd("train", 3)  # ssd.netmodel.load_state_dict(torch.load("F:/Iris_SSD_small/ssd-pytorch-master/logs/Epoch50-Loc0.0260-Conf0.1510.pth", map_location=torch.device('cuda' )))ssd.net = model.eval()ssd.net = torch.nn.DataParallel(ssd.net)ssd.net = ssd.net.cpu()  # ****model = ssd.net.module'''# model = models.resnet50(pretrained=True)grad_cam = GradCam(model=model, feature_module=model.layer4, \target_layer_names=["2"], use_cuda=args.use_cuda)img = cv2.imread(args.image_path, 1)  # 读取图片文件 (H, W, BGR)# If set, always convert image to the 3 channel BGR color image. img = np.float32(img) / 255  # 转为float32类型,范围是[0,1]# Opencv loads as BGR:img = img[:, :, ::-1]  # BGR格式转换为RGB格式 shape: (224, 224, 3) 即(H, W, RGB)input_img = preprocess_image(img)  # torch.Size([1, 3, 224, 224])# If None, returns the map for the highest scoring category.# Otherwise, targets the requested category.target_category = Nonegrayscale_cam = grad_cam(input_img, target_category=None)  # shape: (224, 224)grayscale_cam = cv2.resize(grayscale_cam, (img.shape[1], img.shape[0]))  # shape: (224, 224) # 这里要留意传入的形状是(w,h)  其实以上这行代码不需要执行,暂且先留着cam = show_cam_on_image(img, grayscale_cam)  # shape: (224, 224, 3)cv2.imwrite("cam.jpg", cam)  # 保存图片'''# -----------------------------------------------------------------------------------# -----------------------------------------------------------------------------------gb_model = GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)# input_img.grad.zero_()  # AttributeError: 'NoneType' object has no attribute 'zero_'image = Image.open(args.image_path)image = image.convert('RGB')image_shape = np.array(np.shape(image)[0:2]) # 获得图片的尺寸crop_img = np.array(letterbox_image(image, (300,300)))  # (300, 300, 3)# photo = np.array(crop_img,dtype = np.float64) # 类型转为dtype = np.float64photo = torch.from_numpy(np.expand_dims(np.transpose(crop_img-MEANS,(2,0,1)),0))\.type(torch.FloatTensor).requires_grad_(True)  # 将颜色通道对应的维度调整到前面 torch.Size([1, 3, 300, 300])photo = photo.requires_grad_().cpu()  # .cpu()  # 范围是0-255gb = gb_model(photo, target_category=None)  # shape: (3, 300, 300) 相对于输入图像的梯度gb = gb.transpose((1, 2, 0))  # 调整通道在维度中的位置顺序 shape:(300, 300, 3)  相对于输入图像的梯度# cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])  # shape:(224, 224, 3) # 由多个单通道的数组创建一个多通道的数组# cam_gb = deprocess_image(cam_mask*gb)  # shape: (224, 224, 3)# cv2.imwrite('cam_gb.jpg', cam_gb)  # 保存图片gb = deprocess_image(gb)  # shape: (300, 300, 3)# image = Image.open(args.image_path)# image.show()# image_size = image.sizeiw, ih = image.size  # 640, 480w, h = 300, 300scale = min(w/iw, h/ih)  # 0.46875nw = int(iw*scale)  # 300nh = int(ih*scale)  # 225result4PIL = Image.fromarray(np.uint8(gb))  # size:(300, 300)box = [(w-nw)//2, (h-nh)//2, nw+(w-nw)//2, nh+(h-nh)//2]  # [0, 37, 300, 262]result4PIL = result4PIL.resize((iw,ih),box=box)  # size:(640, 480)result4PIL.save('gb.jpg')   # cv2.imwrite('gb.jpg', gb)  # 保存图片# -----------------------------------------------------------------------------------# cv2.imwrite("cam.jpg", cam)  # 保存图片# cv2.imwrite('gb.jpg', gb)  # 保存图片# cv2.imwrite('cam_gb.jpg', cam_gb)  # 保存图片# -----------------------------------------------------------------------------------# 运行程序: python gradcam.py --image-path 1.jpg
# 运行程序: python gradcam.py --image-path ./examples/both.png

运行结果截图展示:

grad-cam用于SSD目标检测相关推荐

Pytorch搭建SSD目标检测平台
学习前言什么是SSD目标检测算法源码下载 SSD实现思路一.预测部分 1.主干网络介绍 2.从特征获取预测结果 3.预测结果的解码 4.在原图上进行绘制二.训练部分 1.真实框的处理 2.利用 ...
基于Grad-CAM与KL损失的SSD目标检测算法
基于Grad-CAM与KL损失的SSD目标检测算法人工智能技术与咨询来源:<电子学报>,作者侯庆山等摘要: 鉴于Single Shot Multibox Detector (SSD ...
SSD目标检测的个人总结（1）—— 锚框的生成
SSD目标检测的个人总结(1)-- 锚框的生成前言锚框锚框的生成锚框的绘制前言沐神的代码看了很久.B站上的视频也刷了很多遍,感叹下自己的基础确实不怎么扎实,锚框部分的底层代码几乎是一行行撸 ...
睿智的目标检测23——Pytorch搭建SSD目标检测平台
睿智的目标检测23--Pytorch搭建SSD目标检测平台学习前言什么是SSD目标检测算法源码下载 SSD实现思路一.预测部分 1.主干网络介绍 2.从特征获取预测结果 3.预测结果的解码 4 ...
基于神经网络的目标检测论文之目标检测方法：改进的SSD目标检测算法
4.2 改进的SSD 上一章我们了解到,物体识别检测算法是在传统CNN算法基础上加上目标区域建议策略和边框回归算法得到的.前辈们的工作主要体现在目标区域建议的改进策略上,从最开始的穷举建议框,到划分图 ...
华为开源自研AI框架昇思MindSpore应用案例：SSD目标检测
目录一.环境准备 1.进入ModelArts官网 2.使用CodeLab体验Notebook实例二.环境准备三.数据准备与处理数据采样数据集创建四.模型构建五.损失函数六.Metric ...
DisARM：用于3D目标检测的位移感知关联模块（CVPR2022）
作者丨花椒壳壳@知乎来源丨https://zhuanlan.zhihu.com/p/490441536 编辑丨3D视觉工坊论文标题:DisARM: Displacement Aware Relat ...
【北大微软】用于视频目标检测的记忆增强的全局-局部聚合
关注上方"深度学习技术前沿",选择"星标公众号", 资源干货,第一时间送达! 北大&MSRA,入选 CVPR 2020,ImageNet VID SOT ...
Localization Distillation for Dense Object Detection（用于密集目标检测的定位蒸馏）CVPR2022
最前面是论文翻译,中间是背景+问题+方法步骤+实验过程,最后是文中的部分专业名词介绍(水平线分开,能力有限,部分翻译可能不太准确) 摘要: 知识蒸馏(KD)在目标检测中具有强大的学习紧凑模型的能力.以 ...

grad-cam用于SSD目标检测

grad-cam用于SSD目标检测相关推荐

最新文章

热门文章