我们将通过图像分类器上的示例来探讨该主题。 具体而言,我们将使用最流行的一种攻击方法,即快速梯度符号攻击(FGSM)来欺骗 MNIST 分类器。



from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as pltepsilons = [0, .05, .1, .15, .2, .25, .3]
pretrained_model = "../data/lenet_mnist_model.pth"
use_cuda=True# LeNet Model definition
class Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Conv2d(1, 10, kernel_size=5)self.conv2 = nn.Conv2d(10, 20, kernel_size=5)self.conv2_drop = nn.Dropout2d()self.fc1 = nn.Linear(320, 50)self.fc2 = nn.Linear(50, 10)def forward(self, x):x = F.relu(F.max_pool2d(self.conv1(x), 2))x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))x = x.view(-1, 320)x = F.relu(self.fc1(x))x = F.dropout(x, training=self.training)x = self.fc2(x)return F.log_softmax(x, dim=1)# MNIST Test dataset and dataloader declaration
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor(),])),batch_size=1, shuffle=True)# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")# Initialize the network
model = Net().to(device)# Load the pretrained model
model.load_state_dict(torch.load(pretrained_model, map_location='cpu'))# Set the model in evaluation mode. In this case this is for the Dropout layers
model.eval()# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):# Collect the element-wise sign of the data gradientsign_data_grad = data_grad.sign()# Create the perturbed image by adjusting each pixel of the input imageperturbed_image = image + epsilon*sign_data_grad# Adding clipping to maintain [0,1] rangeperturbed_image = torch.clamp(perturbed_image, 0, 1)# Return the perturbed imagereturn perturbed_imagedef test( model, device, test_loader, epsilon ):# Accuracy countercorrect = 0adv_examples = []# Loop over all examples in test setfor data, target in test_loader:# Send the data and label to the devicedata, target = data.to(device), target.to(device)# Set requires_grad attribute of tensor. Important for Attackdata.requires_grad = True# Forward pass the data through the modeloutput = model(data)init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability# If the initial prediction is wrong, dont bother attacking, just move onif init_pred.item() != target.item():continue# Calculate the lossloss = F.nll_loss(output, target)# Zero all existing gradientsmodel.zero_grad()# Calculate gradients of model in backward passloss.backward()# Collect datagraddata_grad = data.grad.data# Call FGSM Attackperturbed_data = fgsm_attack(data, epsilon, data_grad)# Re-classify the perturbed imageoutput = model(perturbed_data)# Check for successfinal_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probabilityif final_pred.item() == target.item():correct += 1# Special case for saving 0 epsilon examplesif (epsilon == 0) and (len(adv_examples) < 5):adv_ex = perturbed_data.squeeze().detach().cpu().numpy()adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )else:# Save some adv examples for visualization laterif len(adv_examples) < 5:adv_ex = perturbed_data.squeeze().detach().cpu().numpy()adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )# Calculate final accuracy for this epsilonfinal_acc = correct/float(len(test_loader))print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))# Return the accuracy and an adversarial examplereturn final_acc, adv_examplesaccuracies = []
examples = []# Run test for each epsilon
for eps in epsilons:acc, ex = test(model, device, test_loader, eps)accuracies.append(acc)examples.append(ex)plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracies, "*-")
plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, .35, step=0.05))
plt.title("Accuracy vs Epsilon")
plt.show()# Plot several examples of adversarial samples at each epsilon
cnt = 0
for i in range(len(epsilons)):for j in range(len(examples[i])):cnt += 1plt.subplot(len(epsilons),len(examples[0]),cnt)plt.xticks([], [])plt.yticks([], [])if j == 0:plt.ylabel("Eps: {}".format(epsilons[i]), fontsize=14)orig,adv,ex = examples[i][j]plt.title("{} -> {}".format(orig, adv))plt.imshow(ex, cmap="gray")


(pytorch) λ python fsgm-gan.py
CUDA Available:  True
Epsilon: 0      Test Accuracy = 9810 / 10000 = 0.981
Epsilon: 0.05   Test Accuracy = 9426 / 10000 = 0.9426
Epsilon: 0.1    Test Accuracy = 8510 / 10000 = 0.851
Epsilon: 0.15   Test Accuracy = 6826 / 10000 = 0.6826
Epsilon: 0.2    Test Accuracy = 4301 / 10000 = 0.4301
Epsilon: 0.25   Test Accuracy = 2082 / 10000 = 0.2082
Epsilon: 0.3    Test Accuracy = 869 / 10000 = 0.0869


