
ALI跟BiGAN的设计一模一样,但是就是没有加Latent regressor。虽然在ALI中也简要地谈到了这个Latent regressor。

并且根据ALI中的模型(G, E,D的架构)更容易实现,条理更加清晰,模型的结构设计实现也很容易。



  1. ALI虽然提到Latent regressor但是,并没有使用。(只是说可以用来作为一个正则化,提高精度的额外的方法);BiGAN则放了较大的笔墨在这个regressor上。

  2. ALI结构更加清晰,并且各个模块的训练对应的损失也较大的很清晰;BiGAN虽然有在语言上大致描述为什么,但是描述的不够直观清晰,而且GAN训练本来就存在大量的坑,稍微'合理'修改某个小细节,就会导致训练不出结果。

  3. ALI对于E的解释上做得比较好(E可以理解为另外的一种G),这样看来都是来fool D的所以也是一种对抗,较为直观。并且ALI的数学分析部分和GAN承接的更加好,写得更加清晰。


两者虽然都谈到了Latent regressor,但是ALI更侧重笔墨于模型的结构的设计(但是画图不行)。BiGAN虽然更侧重于Latent regressor,但是结构画图相当不错。可以说是非常喜剧了。



虽然谈到了latent regressor,但是算法中并没有交代使用。




后来就常用 ALI/BiGAN来表示这个模型。



实验相比于BiGAN没有使用latent regressor,但是效果居然也还行。



import osimport torchfrom import Dataset, DataLoaderimport torch.nn as nnfrom model import Generator, Discriminator, Encoderimport torchvisionimport itertoolsimport matplotlib.pyplot as pltimport torchvision.utils as vutilsimport numpy as npif __name__ == '__main__':    LR = 0.0002    EPOCH = 100  # 50    BATCH_SIZE = 100    N_IDEAS = 128    lam = 1    DOWNLOAD_MNIST = False    TRAINED = False    mnist_root = '../Conditional-GAN/mnist/'    if not (os.path.exists(mnist_root)) or not os.listdir(mnist_root):        # not mnist dir or mnist is empyt dir        DOWNLOAD_MNIST = True    train_data = torchvision.datasets.MNIST(        root=mnist_root,        train=True,  # this is training data        transform=torchvision.transforms.ToTensor(),  # Converts a PIL.Image or numpy.ndarray to        # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]        download=DOWNLOAD_MNIST,    )    train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)    torch.cuda.empty_cache()    if TRAINED:        G = torch.load('G.pkl').cuda()        D = torch.load('D.pkl').cuda()        E = torch.load('E.pkl').cuda()    else:        G = Generator(N_IDEAS).cuda()        D = Discriminator().cuda()        E = Encoder(input_size=1, out_size=N_IDEAS).cuda()    optimizerG_E = torch.optim.Adam(itertools.chain(G.parameters(), E.parameters()), lr=LR)    optimizerD = torch.optim.Adam(D.parameters(), lr=LR)    l_c = nn.MSELoss()    for epoch in range(EPOCH):        tmpD, tmpG_E, tmpE = 0, 0, 0        for step, (x, y) in enumerate(train_loader):            # x            x = x.cuda()            z = torch.randn((x.shape[0], N_IDEAS, 1, 1)).cuda()            # z, G, D            G_z = G(z)            D_G_z = torch.mean(D(G_z, z))  # fake            # x, E, D            E_x = E(x)            D_E_x = torch.mean(D(x, E_x))  # real            D_loss = -torch.mean(torch.log(D_E_x) + torch.log(1 - D_G_z))            Latent_regress = l_c(z, E(G_z))            G_E_loss = -torch.mean(torch.log(1 - D_E_x) + torch.log(D_G_z))  # + lam * Latent_regress            optimizerD.zero_grad()            D_loss.backward(retain_graph=True)            optimizerD.step()            optimizerG_E.zero_grad()            G_E_loss.backward(retain_graph=True)            optimizerG_E.step()            tmpD_ = D_loss.cpu().detach().data            tmpG_E_ = G_E_loss.cpu().detach().data            tmpE_ = Latent_regress.cpu().detach().data            tmpD += tmpD_            tmpG_E += tmpG_E_            tmpE += tmpE_        tmpD /= (step + 1)        tmpG_E /= (step + 1)        tmpE /= (step + 1)        print(            'epoch %d avg of loss: D: %.6f, G_E: %.6f, latent: %.6f' % (epoch, tmpD, tmpG_E, tmpE)        )        if epoch % 2 == 0:            # x = x.cuda()            G_imgs = G(E(x)).cpu().detach()            fig = plt.figure(figsize=(10, 10))            plt.axis("off")            plt.imshow(                np.transpose(vutils.make_grid([G_imgs, x.cpu().detach()]), nrow=10, padding=0, normalize=True,                                              scale_each=True), (1, 2, 0)))            plt.savefig('E_%d_.png' % step)  , 'G.pkl'), 'D.pkl'), 'E.pkl')

import osimport torchimport torch.nn as nnimport as Dataimport torchvisionfrom import DataLoaderclass Generator(nn.Module):    def __init__(self, input_size):        super(Generator, self).__init__()        strides = [1, 2, 2, 2]        padding = [0, 1, 1, 1]        channels = [input_size,                    256, 128, 64, 32]  # 1表示一维        kernels = [4, 3, 4, 4]        model = []        for i, stride in enumerate(strides):            model.append(                nn.ConvTranspose2d(                    in_channels=channels[i],                    out_channels=channels[i + 1],                    stride=stride,                    kernel_size=kernels[i],                    padding=padding[i]                )            )            model.append(                nn.BatchNorm2d(channels[i + 1])            )            model.append(                nn.LeakyReLU(.1)            )        self.Conv_T = nn.Sequential(*model)        self.Conv = nn.Sequential(            nn.Conv2d(kernel_size=1, stride=1, in_channels=channels[-1], out_channels=channels[-1]),            nn.BatchNorm2d(channels[-1]),            nn.LeakyReLU(.1),            nn.Conv2d(kernel_size=1, stride=1, in_channels=channels[-1], out_channels=1),            nn.Sigmoid()        )    def forward(self, x):        x = self.Conv_T(x)        x = self.Conv(x)        return xclass Encoder(nn.Module):    def __init__(self, input_size=1, out_size=128):        super(Encoder, self).__init__()        strides = [2, 2, 2, 1, 1, 1]        padding = [1, 1, 1, 0, 0, 0]        channels = [input_size, 32, 64, 128, 256, out_size, out_size]  # 1表示一维        kernels = [4, 4, 4, 3, 1, 1]        model = []        for i, stride in enumerate(strides):            model.append(                nn.Conv2d(                    in_channels=channels[i],                    out_channels=channels[i + 1],                    stride=stride,                    kernel_size=kernels[i],                    padding=padding[i]                )            )            if i != len(strides) - 1:                model.append(                    nn.BatchNorm2d(channels[i + 1])                )                model.append(                    nn.ReLU()                )        self.main = nn.Sequential(*model)    def forward(self, x):        x = self.main(x)        return xclass Discriminator(nn.Module):    def __init__(self, x_in=1, z_in=128):        super(Discriminator, self).__init__()        self.D_x = nn.Sequential(            nn.Conv2d(in_channels=x_in, out_channels=32, kernel_size=4, stride=2),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2),            nn.BatchNorm2d(64),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2),            nn.BatchNorm2d(128),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),        )        self.D_z = nn.Sequential(            nn.Conv2d(in_channels=z_in, out_channels=256, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),        )        self.D_x_z = nn.Sequential(            nn.Conv2d(in_channels=256 + 128, out_channels=512, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.LeakyReLU(.1),            nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1),            nn.Dropout2d(.2),            nn.Sigmoid(),        )    def forward(self, x, z):        x = self.D_x(x)        z = self.D_z(z)        cat_x_z =[x, z], dim=1)        return self.D_x_z(cat_x_z)if __name__ == '__main__':    N_IDEAS = 128    G = Generator(N_IDEAS, )    rand_noise = torch.randn((10, N_IDEAS, 1, 1))    print(G(rand_noise).shape)    E = Encoder(input_size=1, out_size=N_IDEAS)    print(E(G(rand_noise)).shape)    D = Discriminator()    print(D(G(rand_noise), rand_noise).shape)

import numpy as npimport torchimport matplotlib.pyplot as pltfrom model import Generator, Discriminatorimport torchvision.utils as vutilsimport osimport torchvisionfrom import Dataset, DataLoaderif __name__ == '__main__':    BATCH_SIZE = 100    N_IDEAS = 12    TIME = 10    G = torch.load("G.pkl").cuda()    mnist_root = '../Conditional-GAN/mnist/'    DOWNLOAD_MNIST = False    if not (os.path.exists(mnist_root)) or not os.listdir(mnist_root):        # not mnist dir or mnist is empyt dir        DOWNLOAD_MNIST = True    train_data = torchvision.datasets.MNIST(        root=mnist_root,        train=True,  # this is training data        transform=torchvision.transforms.ToTensor(),  # Converts a PIL.Image or numpy.ndarray to        # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0]        download=DOWNLOAD_MNIST,    )    train_loader = DataLoader(dataset=train_data, batch_size=10, shuffle=True)    E = torch.load('E.pkl')    for t in range(TIME):        tmp = []        for step, (x, y) in enumerate(train_loader):            # x            x = x.cuda()            G_imgs = G(E(x)).cpu().detach()            tmp.append([G_imgs, x.cpu().detach()]))            if step == 5:                break        fig = plt.figure(figsize=(10, 10))        plt.axis("off")        plt.imshow(            np.transpose(vutils.make_grid(, nrow=10, padding=0, normalize=True,                                          scale_each=True), (1, 2, 0)))        plt.savefig('E_%d.png' % t)


