第三卷 第十一章 车辆识别



斯坦福汽车数据集包含196辆汽车的16185 张图像,但是数据集中的极端类别不平衡,其中一些车辆制造商和型号被严重过度代表(例如,奥迪和宝马各有超过1000个数据点,而特斯拉只有 77 个示例)。








# import the necessary packages
from os import path# define the base path to the cars dataset
BASE_PATH = "/raid/datasets/cars"# based on the base path, derive the images path and meta file path
IMAGES_PATH = path.sep.join([BASE_PATH, "car_ims"])
LABELS_PATH = path.sep.join([BASE_PATH, "complete_dataset.csv"])# define the path to the output training, validation, and testing
# lists
TRAIN_MX_LIST = path.sep.join([MX_OUTPUT, "lists/train.lst"])
VAL_MX_LIST = path.sep.join([MX_OUTPUT, "lists/val.lst"])
TEST_MX_LIST = path.sep.join([MX_OUTPUT, "lists/test.lst"])# define the path to the output training, validation, and testing
# image records
TRAIN_MX_REC = path.sep.join([MX_OUTPUT, "rec/train.rec"])
VAL_MX_REC = path.sep.join([MX_OUTPUT, "rec/val.rec"])
TEST_MX_REC = path.sep.join([MX_OUTPUT, "rec/test.rec"])# define the path to the label encoder
LABEL_ENCODER_PATH = path.sep.join([BASE_PATH, "output/le.cpickle"])# define the RGB means from the ImageNet dataset
R_MEAN = 123.68
G_MEAN = 116.779
B_MEAN = 103.939# define the percentage of validation and testing images relative
# to the number of training images
NUM_TEST_IMAGES = 0.15# define the batch size

创建build_dataset.py,负责构建训练、验证和测试 .lst 文件。

# import the necessary packages
import car_config as config
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import progressbar
import pickle
import os# read the contents of the labels file, then initialize the list of
# image paths and labels
print("[INFO] loading image paths and labels...")
rows = open(config.LABELS_PATH).read()
rows = rows.strip().split("\n")[1:]
trainPaths = []
trainLabels = []# loop over the rows
for row in rows:# unpack the row, then update the image paths and labels list# (filename, make) = row.split(",")[:2](filename, make, model) = row.split(",")[:3]filename = filename[filename.rfind("/") + 1:]trainPaths.append(os.sep.join([config.IMAGES_PATH, filename]))trainLabels.append("{}:{}".format(make, model))# now that we have the total number of images in the dataset that
# can be used for training, compute the number of images that
# should be used for validation and testing
numVal = int(len(trainPaths) * config.NUM_VAL_IMAGES)
numTest = int(len(trainPaths) * config.NUM_TEST_IMAGES)# our class labels are represented as strings so we need to encode
# them
print("[INFO] encoding labels...")
le = LabelEncoder().fit(trainLabels)
trainLabels = le.transform(trainLabels)# perform sampling from the training set to construct a a validation
# set
print("[INFO] constructing validation data...")
split = train_test_split(trainPaths, trainLabels, test_size=numVal, stratify=trainLabels)
(trainPaths, valPaths, trainLabels, valLabels) = split# perform stratified sampling from the training set to construct a
# a testing set
print("[INFO] constructing testing data...")
split = train_test_split(trainPaths, trainLabels, test_size=numTest, stratify=trainLabels)
(trainPaths, testPaths, trainLabels, testLabels) = split# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output list
# files
datasets = [("train", trainPaths, trainLabels, config.TRAIN_MX_LIST),("val", valPaths, valLabels, config.VAL_MX_LIST),("test", testPaths, testLabels, config.TEST_MX_LIST)]# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:# open the output file for writingprint("[INFO] building {}...".format(outputPath))f = open(outputPath, "w")# initialize the progress barwidgets = ["Building List: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()# loop over each of the individual images + labelsfor (i, (path, label)) in enumerate(zip(paths, labels)):# write the image index, label, and output path to filerow = "\t".join([str(i), str(label), path])f.write("{}\n".format(row))pbar.update(i)# close the output filepbar.finish()f.close()# write the label encoder to file
print("[INFO] serializing label encoder...")
f = open(config.LABEL_ENCODER_PATH, "wb")

生成 train.rec 文件的命令,该文件与 TRAIN_MX_REC 配置中的文件路径完全相同:

生成 test.rec数据集:


        3、在斯坦福汽车数据集上微调 VGG

首先需要下载VGG16 的预训练权重。
# import the necessary packages
import mxnet as mx
import car_config as config
import argparse
import logging
import os# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--vgg", required=False, help="path to pre-trained VGGNet for fine-tuning", default="D:/Project/deeplearn/预训练模型/vgg16_zoo/vgg16")
ap.add_argument("-c", "--checkpoints", required=False, help="path to output checkpoint directory", default="checkpoints")
ap.add_argument("-p", "--prefix", required=False, help="name of model prefix", default="vggnet")
ap.add_argument("-s", "--start-epoch", type=int, default=0, help="epoch to restart training at")
args = vars(ap.parse_args())# set the logging level and output file
logging.basicConfig(level=logging.DEBUG, filename="training_{}.log".format(args["start_epoch"]), filemode="w")# determine the batch
batchSize = config.BATCH_SIZE * config.NUM_DEVICES# construct the training image iterator
trainIter = mx.io.ImageRecordIter(path_imgrec=config.TRAIN_MX_REC,data_shape=(3, 224, 224),batch_size=batchSize,rand_crop=True,rand_mirror=True,rotate=15,max_shear_ratio=0.1,mean_r=config.R_MEAN,mean_g=config.G_MEAN,mean_b=config.B_MEAN,preprocess_threads=config.NUM_DEVICES * 2)# construct the validation image iterator
valIter = mx.io.ImageRecordIter(path_imgrec=config.VAL_MX_REC,data_shape=(3, 224, 224),batch_size=batchSize,mean_r=config.R_MEAN,mean_g=config.G_MEAN,mean_b=config.B_MEAN)# initialize the optimizer and the training contexts
opt = mx.optimizer.SGD(learning_rate=1e-4, momentum=0.9, wd=0.0005, rescale_grad=1.0 / batchSize)
ctx = [mx.gpu(3)]# construct the checkpoints path, initialize the model argument and
# auxiliary parameters, and whether uninitialized parameters should
# be allowed
checkpointsPath = os.path.sep.join([args["checkpoints"], args["prefix"]])
argParams = None
auxParams = None
allowMissing = False# if there is no specific model starting epoch supplied, then we
# need to build the network architecture
if args["start_epoch"] <= 0:# load the pre-trained VGG16 modelprint("[INFO] loading pre-trained model...")(symbol, argParams, auxParams) = mx.model.load_checkpoint(args["vgg"], 0)allowMissing = True# grab the layers from the pre-trained model, then find the# dropout layer *prior* to the final FC layer (i.e., the layer# that contains the number of class labels)# HINT: you can find layer names like this:# for layer in layers:# print(layer.name)# then, append the string ‘_output‘ to the layer namelayers = symbol.get_internals()net = layers["drop7_output"]# construct a new FC layer using the desired number of output# class labels, followed by a softmax outputnet = mx.sym.FullyConnected(data=net, num_hidden = config.NUM_CLASSES, name = "fc8")net = mx.sym.SoftmaxOutput(data=net, name="softmax")# construct a new set of network arguments, removing any previous# arguments pertaining to FC8 (this will allow us to train the# final layer)argParams = dict({k: argParams[k] for k in argParams if "fc8" not in k})# otherwise, a specific checkpoint was supplied
else:# load the checkpoint from diskprint("[INFO] loading epoch {}...".format(args["start_epoch"]))(net, argParams, auxParams) = mx.model.load_checkpoint(checkpointsPath, args["start_epoch"])# initialize the callbacks and evaluation metrics
batchEndCBs = [mx.callback.Speedometer(batchSize, 50)]
epochEndCBs = [mx.callback.do_checkpoint(checkpointsPath)]
metrics = [mx.metric.Accuracy(), mx.metric.TopKAccuracy(top_k=5), mx.metric.CrossEntropy()]# construct the model and train it
print("[INFO] training network...")
model = mx.mod.Module(symbol=net, context=ctx)

在小型数据集(如斯坦福汽车)上微调(大型)网络(如 VGG16)时,过度拟合是不可避免的。 即使在应用数据增强时,也有网络中的参数太多而训练示例太少。 因此,获得正确的初始学习率非常重要——这比训练时更重要从零开始的网络。 花点时间微调网络并探索各种初始学习率。 此过程将为您提供在微调期间获得最高准确度的最佳机会。



# import the necessary packages
import car_config as config
from customize.tools.ranked import rank5_accuracy
import mxnet as mx
import argparse
import pickle
import os# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--checkpoints", required=True, help="path to output checkpoint directory")
ap.add_argument("-p", "--prefix", required=True, help="name of model prefix")
ap.add_argument("-e", "--epoch", type=int, required=True, help="epoch # to load")
args = vars(ap.parse_args())# load the label encoder
le = pickle.loads(open(config.LABEL_ENCODER_PATH, "rb").read())# construct the validation image iterator
testIter = mx.io.ImageRecordIter(path_imgrec=config.TEST_MX_REC,data_shape=(3, 224, 224),batch_size=config.BATCH_SIZE,mean_r=config.R_MEAN,mean_g=config.G_MEAN,mean_b=config.B_MEAN)# load our pre-trained model
print("[INFO] loading pre-trained model...")
checkpointsPath = os.path.sep.join([args["checkpoints"], args["prefix"]])
(symbol, argParams, auxParams) = mx.model.load_checkpoint(checkpointsPath, args["epoch"])# construct the model
model = mx.mod.Module(symbol=symbol, context=[mx.gpu(0)])
model.bind(data_shapes=testIter.provide_data, label_shapes=testIter.provide_label)
model.set_params(argParams, auxParams)# initialize the list of predictions and targets
print("[INFO] evaluating model...")
predictions = []
targets = []# loop over the predictions in batches
for (preds, _, batch) in model.iter_predict(testIter):# convert the batch of predictions and labels to NumPy# arrayspreds = preds[0].asnumpy()labels = batch.label[0].asnumpy().astype("int")# update the predictions and targets lists, respectivelypredictions.extend(preds)targets.extend(labels)# apply array slicing to the targets since mxnet will return the
# next full batch size rather than the *actual* number of labels
targets = targets[:len(predictions)]# compute the rank-1 and rank-5 accuracies
(rank1, rank5) = rank5_accuracy(predictions, targets)
print("[INFO] rank-1: {:.2f}%".format(rank1 * 100))
print("[INFO] rank-5: {:.2f}%".format(rank5 * 100))

结果表明,我们能够在测试集上获得 84.22% 的 rank-1 和 96.54% 的 rank-5 准确率。



# due to mxnet seg-fault issue, need to place OpenCV import at the
# top of the file
import cv2# import the necessary packages
import car_config as config
from customize.tools.imagetoarraypreprocessor import ImageToArrayPreprocessor
from customize.tools.aspectawarepreprocessor import AspectAwarePreprocessor
from customize.tools.meanpreprocessor import MeanPreprocessor
import numpy as np
import mxnet as mx
import argparse
import pickle
import imutils
import os# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--checkpoints", required=True, help="path to the checkpoint directory")
ap.add_argument("-p", "--prefix", required=True, help="name of model prefix")
ap.add_argument("-e", "--epoch", type=int, required=True, help="epoch # to load")
ap.add_argument("-s", "--sample-size", type=int, default=10, help="epoch # to load")
args = vars(ap.parse_args())# load the label encoder, followed by the testing dataset file,
# then sample the testing set
le = pickle.loads(open(config.LABEL_ENCODER_PATH, "rb").read())
rows = open(config.TEST_MX_LIST).read().strip().split("\n")
rows = np.random.choice(rows, size=args["sample_size"])# load our pre-trained model
print("[INFO] loading pre-trained model...")
checkpointsPath = os.path.sep.join([args["checkpoints"], args["prefix"]])
model = mx.model.FeedForward.load(checkpointsPath, args["epoch"])# compile the model
model = mx.model.FeedForward(ctx=[mx.gpu(0)],symbol=model.symbol,arg_params=model.arg_params,aux_params=model.aux_params)# initialize the image pre-processors
sp = AspectAwarePreprocessor(width=224, height=224)
mp = MeanPreprocessor(config.R_MEAN, config.G_MEAN, config.B_MEAN)
iap = ImageToArrayPreprocessor(dataFormat="channels_first")# loop over the testing images
for row in rows:# grab the target class label and the image path from the row(target, imagePath) = row.split("\t")[1:]target = int(target)# load the image from disk and pre-process it by resizing the# image and applying the pre-processorsimage = cv2.imread(imagePath)orig = image.copy()orig = imutils.resize(orig, width=min(500, orig.shape[1]))image = iap.preprocess(mp.preprocess(sp.preprocess(image)))image = np.expand_dims(image, axis=0)# classify the image and grab the indexes of the top-5 predictionspreds = model.predict(image)[0]idxs = np.argsort(preds)[::-1][:5]# show the true class labelprint("[INFO] actual={}".format(le.inverse_transform(target)))# format and display the top predicted class labellabel = le.inverse_transform(idxs[0])label = label.replace(":", " ")label = "{}: {:.2f}%".format(label, preds[idxs[0]] * 100)cv2.putText(orig, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)# loop over the predictions and display themfor (i, prob) in zip(idxs, preds):print("\t[INFO] predicted={}, probability={:.2f}%".format(le.inverse_transform(i), preds[i] * 100))# show the imagecv2.imshow("Image", orig)cv2.waitKey(0)

我们微调的 VGG16 网络可以以超过 84% 的 1 级和 95% 的 5 级准确率正确识别车辆的品牌和型号。

