


layer {type: 'Python'name: 'loss'top: 'loss'bottom: 'ipx'bottom: 'ipy'python_param {# the module name -- usually the filename -- that needs to be in $PYTHONPATHmodule: 'pyloss'# the layer name -- the class name in the modulelayer: 'EuclideanLossLayer'}# set loss weight so Caffe knows this is a loss layerloss_weight: 1

这里的type就只有Python一种,然后top,bottom和常见的层是一样的,module就是你的python module名字,一般就是文件名,然后layer就是定义的类的名字。


这里就以 Fully Convolutional Networks for Semantic Segmentation 论文中公布的代码作为示例,解释python层该怎么写。

import caffeimport numpy as np
from PIL import Imageimport randomclass VOCSegDataLayer(caffe.Layer):"""Load (input image, label image) pairs from PASCAL VOCone-at-a-time while reshaping the net to preserve dimensions.Use this to feed data to a fully convolutional network."""def setup(self, bottom, top):"""Setup data layer according to parameters:- voc_dir: path to PASCAL VOC year dir- split: train / val / test- mean: tuple of mean values to subtract- randomize: load in random order (default: True)- seed: seed for randomization (default: None / current time)for PASCAL VOC semantic segmentation.exampleparams = dict(voc_dir="/path/to/PASCAL/VOC2011",mean=(104.00698793, 116.66876762, 122.67891434),split="val")"""# configparams = eval(self.param_str)self.voc_dir = params['voc_dir']self.split = params['split']self.mean = np.array(params['mean'])self.random = params.get('randomize', True)self.seed = params.get('seed', None)# two tops: data and labelif len(top) != 2:raise Exception("Need to define two tops: data and label.")# data layers have no bottomsif len(bottom) != 0:raise Exception("Do not define a bottom.")# load indices for images and labelssplit_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,self.split)self.indices = open(split_f, 'r').read().splitlines()self.idx = 0# make eval deterministicif 'train' not in self.split:self.random = False# randomization: seed and pickif self.random:random.seed(self.seed)self.idx = random.randint(0, len(self.indices)-1)def reshape(self, bottom, top):# load image + label image pairself.data = self.load_image(self.indices[self.idx])self.label = self.load_label(self.indices[self.idx])# reshape tops to fit (leading 1 is for batch dimension)top[0].reshape(1, *self.data.shape)top[1].reshape(1, *self.label.shape)def forward(self, bottom, top):# assign outputtop[0].data[...] = self.datatop[1].data[...] = self.label# pick next inputif self.random:self.idx = random.randint(0, len(self.indices)-1)else:self.idx += 1if self.idx == len(self.indices):self.idx = 0def backward(self, top, propagate_down, bottom):passdef load_image(self, idx):"""Load input image and preprocess for Caffe:- cast to float- switch channels RGB -> BGR- subtract mean- transpose to channel x height x width order"""im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))in_ = np.array(im, dtype=np.float32)in_ = in_[:,:,::-1]in_ -= self.meanin_ = in_.transpose((2,0,1))return in_def load_label(self, idx):"""Load label image as 1 x height x width integer array of label indices.The leading singleton dimension is required by the loss."""im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))label = np.array(im, dtype=np.uint8)label = label[np.newaxis, ...]return labelclass SBDDSegDataLayer(caffe.Layer):"""Load (input image, label image) pairs from the SBDD extended labelingof PASCAL VOC for semantic segmentationone-at-a-time while reshaping the net to preserve dimensions.Use this to feed data to a fully convolutional network."""def setup(self, bottom, top):"""Setup data layer according to parameters:- sbdd_dir: path to SBDD `dataset` dir- split: train / seg11valid- mean: tuple of mean values to subtract- randomize: load in random order (default: True)- seed: seed for randomization (default: None / current time)for SBDD semantic segmentation.N.B.segv11alid is the set of segval11 that does not intersect with SBDD.Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.exampleparams = dict(sbdd_dir="/path/to/SBDD/dataset",mean=(104.00698793, 116.66876762, 122.67891434),split="valid")"""# configparams = eval(self.param_str)self.sbdd_dir = params['sbdd_dir']self.split = params['split']self.mean = np.array(params['mean'])self.random = params.get('randomize', True)self.seed = params.get('seed', None)# two tops: data and labelif len(top) != 2:raise Exception("Need to define two tops: data and label.")# data layers have no bottomsif len(bottom) != 0:raise Exception("Do not define a bottom.")# load indices for images and labelssplit_f  = '{}/{}.txt'.format(self.sbdd_dir,self.split)self.indices = open(split_f, 'r').read().splitlines()self.idx = 0# make eval deterministicif 'train' not in self.split:self.random = False# randomization: seed and pickif self.random:random.seed(self.seed)self.idx = random.randint(0, len(self.indices)-1)def reshape(self, bottom, top):# load image + label image pairself.data = self.load_image(self.indices[self.idx])self.label = self.load_label(self.indices[self.idx])# reshape tops to fit (leading 1 is for batch dimension)top[0].reshape(1, *self.data.shape)top[1].reshape(1, *self.label.shape)def forward(self, bottom, top):# assign outputtop[0].data[...] = self.datatop[1].data[...] = self.label# pick next inputif self.random:self.idx = random.randint(0, len(self.indices)-1)else:self.idx += 1if self.idx == len(self.indices):self.idx = 0def backward(self, top, propagate_down, bottom):passdef load_image(self, idx):"""Load input image and preprocess for Caffe:- cast to float- switch channels RGB -> BGR- subtract mean- transpose to channel x height x width order"""im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))in_ = np.array(im, dtype=np.float32)in_ = in_[:,:,::-1]in_ -= self.meanin_ = in_.transpose((2,0,1))return in_def load_label(self, idx):"""Load label image as 1 x height x width integer array of label indices.The leading singleton dimension is required by the loss."""import scipy.iomat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)label = label[np.newaxis, ...]return label



文件: pyloss.py

import caffe
import numpy as npclass EuclideanLossLayer(caffe.Layer):"""Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayerto demonstrate the class interface for developing layers in Python."""def setup(self, bottom, top):# check input pairif len(bottom) != 2:raise Exception("Need two inputs to compute distance.")def reshape(self, bottom, top):# check input dimensions matchif bottom[0].count != bottom[1].count:raise Exception("Inputs must have the same dimension.")# difference is shape of inputsself.diff = np.zeros_like(bottom[0].data, dtype=np.float32)# loss output is scalartop[0].reshape(1)def forward(self, bottom, top):self.diff[...] = bottom[0].data - bottom[1].datatop[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.def backward(self, top, propagate_down, bottom):for i in range(2):if not propagate_down[i]:continueif i == 0:sign = 1else:sign = -1bottom[i].diff[...] = sign * self.diff / bottom[i].num


name: 'LinearRegressionExample'
# define a simple network for linear regression on dummy data
# that computes the loss by a PythonLayer.
layer {type: 'DummyData'name: 'x'top: 'x'dummy_data_param {shape: { dim: 10 dim: 3 dim: 2 }data_filler: { type: 'gaussian' }}
layer {type: 'DummyData'name: 'y'top: 'y'dummy_data_param {shape: { dim: 10 dim: 3 dim: 2 }data_filler: { type: 'gaussian' }}
# include InnerProduct layers for parameters
# so the net will need backward
layer {type: 'InnerProduct'name: 'ipx'top: 'ipx'bottom: 'x'inner_product_param {num_output: 10weight_filler { type: 'xavier' }}
layer {type: 'InnerProduct'name: 'ipy'top: 'ipy'bottom: 'y'inner_product_param {num_output: 10weight_filler { type: 'xavier' }}
layer {type: 'Python'name: 'loss'top: 'loss'bottom: 'ipx'bottom: 'ipy'python_param {# the module name -- usually the filename -- that needs to be in $PYTHONPATHmodule: 'pyloss'# the layer name -- the class name in the modulelayer: 'EuclideanLossLayer'}# set loss weight so Caffe knows this is a loss layer.# since PythonLayer inherits directly from Layer, this isn't automatically# known to Caffeloss_weight: 1


# imports
import json
import time
import pickle
import scipy.misc
import skimage.io
import caffeimport numpy as np
import os.path as ospfrom xml.dom import minidom
from random import shuffle
from threading import Thread
from PIL import Imagefrom tools import SimpleTransformerclass PascalMultilabelDataLayerSync(caffe.Layer):"""This is a simple syncronous datalayer for training a multilabel model onPASCAL."""def setup(self, bottom, top):self.top_names = ['data', 'label']# === Read input parameters ===# params is a python dictionary with layer parameters.params = eval(self.param_str)# Check the paramameters for validity.check_params(params)# store input as class variablesself.batch_size = params['batch_size']# Create a batch loader to load the images.self.batch_loader = BatchLoader(params, None)# === reshape tops ===# since we use a fixed input image size, we can shape the data layer# once. Else, we'd have to do it in the reshape call.top[0].reshape(self.batch_size, 3, params['im_shape'][0], params['im_shape'][1])# Note the 20 channels (because PASCAL has 20 classes.)top[1].reshape(self.batch_size, 20)print_info("PascalMultilabelDataLayerSync", params)def forward(self, bottom, top):"""Load data."""for itt in range(self.batch_size):# Use the batch loader to load the next image.im, multilabel = self.batch_loader.load_next_image()# Add directly to the caffe data layertop[0].data[itt, ...] = imtop[1].data[itt, ...] = multilabeldef reshape(self, bottom, top):"""There is no need to reshape the data, since the input is of fixed size(rows and columns)"""passdef backward(self, top, propagate_down, bottom):"""These layers does not back propagate"""passclass BatchLoader(object):"""This class abstracts away the loading of images.Images can either be loaded singly, or in a batch. The latter is used forthe asyncronous data layer to preload batches while other processing isperformed."""def __init__(self, params, result):self.result = resultself.batch_size = params['batch_size']self.pascal_root = params['pascal_root']self.im_shape = params['im_shape']# get list of image indexes.list_file = params['split'] + '.txt'self.indexlist = [line.rstrip('\n') for line in open(osp.join(self.pascal_root, 'ImageSets/Main', list_file))]self._cur = 0  # current image# this class does some simple data-manipulationsself.transformer = SimpleTransformer()print "BatchLoader initialized with {} images".format(len(self.indexlist))def load_next_image(self):"""Load the next image in a batch."""# Did we finish an epoch?if self._cur == len(self.indexlist):self._cur = 0shuffle(self.indexlist)# Load an imageindex = self.indexlist[self._cur]  # Get the image indeximage_file_name = index + '.jpg'im = np.asarray(Image.open(osp.join(self.pascal_root, 'JPEGImages', image_file_name)))im = scipy.misc.imresize(im, self.im_shape)  # resize# do a simple horizontal flip as data augmentationflip = np.random.choice(2)*2-1im = im[:, ::flip, :]# Load and prepare ground truthmultilabel = np.zeros(20).astype(np.float32)anns = load_pascal_annotation(index, self.pascal_root)for label in anns['gt_classes']:# in the multilabel problem we don't care how MANY instances# there are of each class. Only if they are present.# The "-1" is b/c we are not interested in the background# class.multilabel[label - 1] = 1self._cur += 1return self.transformer.preprocess(im), multilabeldef load_pascal_annotation(index, pascal_root):"""This code is borrowed from Ross Girshick's FAST-RCNN code(https://github.com/rbgirshick/fast-rcnn).It parses the PASCAL .xml metadata files.See publication for further details: (http://arxiv.org/abs/1504.08083).Thanks Ross!"""classes = ('__background__',  # always index 0'aeroplane', 'bicycle', 'bird', 'boat','bottle', 'bus', 'car', 'cat', 'chair','cow', 'diningtable', 'dog', 'horse','motorbike', 'person', 'pottedplant','sheep', 'sofa', 'train', 'tvmonitor')class_to_ind = dict(zip(classes, xrange(21)))filename = osp.join(pascal_root, 'Annotations', index + '.xml')# print 'Loading: {}'.format(filename)def get_data_from_tag(node, tag):return node.getElementsByTagName(tag)[0].childNodes[0].datawith open(filename) as f:data = minidom.parseString(f.read())objs = data.getElementsByTagName('object')num_objs = len(objs)boxes = np.zeros((num_objs, 4), dtype=np.uint16)gt_classes = np.zeros((num_objs), dtype=np.int32)overlaps = np.zeros((num_objs, 21), dtype=np.float32)# Load object bounding boxes into a data frame.for ix, obj in enumerate(objs):# Make pixel indexes 0-basedx1 = float(get_data_from_tag(obj, 'xmin')) - 1y1 = float(get_data_from_tag(obj, 'ymin')) - 1x2 = float(get_data_from_tag(obj, 'xmax')) - 1y2 = float(get_data_from_tag(obj, 'ymax')) - 1cls = class_to_ind[str(get_data_from_tag(obj, "name")).lower().strip()]boxes[ix, :] = [x1, y1, x2, y2]gt_classes[ix] = clsoverlaps[ix, cls] = 1.0overlaps = scipy.sparse.csr_matrix(overlaps)return {'boxes': boxes,'gt_classes': gt_classes,'gt_overlaps': overlaps,'flipped': False,'index': index}def check_params(params):"""A utility function to check the parameters for the data layers."""assert 'split' in params.keys(), 'Params must include split (train, val, or test).'required = ['batch_size', 'pascal_root', 'im_shape']for r in required:assert r in params.keys(), 'Params must include {}'.format(r)def print_info(name, params):"""Ouput some info regarding the class"""print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(name,params['split'],params['batch_size'],params['im_shape'])


from __future__ import print_function
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2# helper function for common structuresdef conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):conv = L.Convolution(bottom, kernel_size=ks, stride=stride,num_output=nout, pad=pad, group=group)return conv, L.ReLU(conv, in_place=True)def fc_relu(bottom, nout):fc = L.InnerProduct(bottom, num_output=nout)return fc, L.ReLU(fc, in_place=True)def max_pool(bottom, ks, stride=1):return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)def caffenet(lmdb, batch_size=256, include_acc=False):data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True))# the net itselfconv1, relu1 = conv_relu(data, 11, 96, stride=4)pool1 = max_pool(relu1, 3, stride=2)norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75)conv2, relu2 = conv_relu(norm1, 5, 256, pad=2, group=2)pool2 = max_pool(relu2, 3, stride=2)norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75)conv3, relu3 = conv_relu(norm2, 3, 384, pad=1)conv4, relu4 = conv_relu(relu3, 3, 384, pad=1, group=2)conv5, relu5 = conv_relu(relu4, 3, 256, pad=1, group=2)pool5 = max_pool(relu5, 3, stride=2)fc6, relu6 = fc_relu(pool5, 4096)drop6 = L.Dropout(relu6, in_place=True)fc7, relu7 = fc_relu(drop6, 4096)drop7 = L.Dropout(relu7, in_place=True)fc8 = L.InnerProduct(drop7, num_output=1000)loss = L.SoftmaxWithLoss(fc8, label)if include_acc:acc = L.Accuracy(fc8, label)return to_proto(loss, acc)else:return to_proto(loss)def make_net():with open('train.prototxt', 'w') as f:print(caffenet('/path/to/caffe-train-lmdb'), file=f)with open('test.prototxt', 'w') as f:print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)if __name__ == '__main__':make_net()


import numpy as npclass SimpleTransformer:"""SimpleTransformer is a simple class for preprocessing and deprocessingimages for caffe."""def __init__(self, mean=[128, 128, 128]):self.mean = np.array(mean, dtype=np.float32)self.scale = 1.0def set_mean(self, mean):"""Set the mean to subtract for centering the data."""self.mean = meandef set_scale(self, scale):"""Set the data scaling."""self.scale = scaledef preprocess(self, im):"""preprocess() emulate the pre-processing occuring in the vgg16 caffeprototxt."""im = np.float32(im)im = im[:, :, ::-1]  # change to BGRim -= self.meanim *= self.scaleim = im.transpose((2, 0, 1))return imdef deprocess(self, im):"""inverse of preprocess()"""im = im.transpose(1, 2, 0)im /= self.scaleim += self.meanim = im[:, :, ::-1]  # change to RGBreturn np.uint8(im)class CaffeSolver:"""Caffesolver is a class for creating a solver.prototxt file. It sets defaultvalues and can export a solver parameter file.Note that all parameters are stored as strings. Strings variables arestored as strings in strings."""def __init__(self, testnet_prototxt_path="testnet.prototxt",trainnet_prototxt_path="trainnet.prototxt", debug=False):self.sp = {}# critical:self.sp['base_lr'] = '0.001'self.sp['momentum'] = '0.9'# speed:self.sp['test_iter'] = '100'self.sp['test_interval'] = '250'# looks:self.sp['display'] = '25'self.sp['snapshot'] = '2500'self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!# learning rate policyself.sp['lr_policy'] = '"fixed"'# important, but rare:self.sp['gamma'] = '0.1'self.sp['weight_decay'] = '0.0005'self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'self.sp['test_net'] = '"' + testnet_prototxt_path + '"'# pretty much never change these.self.sp['max_iter'] = '100000'self.sp['test_initialization'] = 'false'self.sp['average_loss'] = '25'  # this has to do with the display.self.sp['iter_size'] = '1'  # this is for accumulating gradientsif (debug):self.sp['max_iter'] = '12'self.sp['test_iter'] = '1'self.sp['test_interval'] = '4'self.sp['display'] = '1'def add_from_file(self, filepath):"""Reads a caffe solver prototxt file and updates the Caffesolverinstance parameters."""with open(filepath, 'r') as f:for line in f:if line[0] == '#':continuesplitLine = line.split(':')self.sp[splitLine[0].strip()] = splitLine[1].strip()def write(self, filepath):"""Export solver parameters to INPUT "filepath". Sorted alphabetically."""f = open(filepath, 'w')for key, value in sorted(self.sp.items()):if not(type(value) is str):raise TypeError('All solver parameters must be strings')f.write('%s: %s\n' % (key, value))

