前几天把HABI哈希图像检索工具包更新到V2.0版本后,小白菜又重新回头来用Python搞BoW词袋模型,一方面主要是练练Python,另一方面也是为了CBIR群开讲的关于图像检索群活动第二期而准备的一些素材。关于BoW,网上堆资料讲得挺好挺全的了,小白菜自己在曾留下过一篇讲解BoW词袋构建过程的博文Bag of Words模型,所以这里主要讲讲BoW的实战。不过在实战前,小白菜还想在结合自己这两年多BoW的思考和沉淀重新以更直白的方式对BoW做一下总结。









#python findFeatures.py -t dataset/train/

import argparse as ap

import cv2

import numpy as np

import os

from sklearn.externals import joblib

from scipy.cluster.vq import *

from sklearn import preprocessing

from rootsift import RootSIFT

import math

# Get the path of the training set

parser = ap.ArgumentParser()

parser.add_argument("-t", "--trainingSet", help="Path to Training Set", required="True")

args = vars(parser.parse_args())

# Get the training classes names and store them in a list

train_path = args["trainingSet"]

#train_path = "dataset/train/"

training_names = os.listdir(train_path)

numWords = 1000

# Get all the path to the images and save them in a list

# image_paths and the corresponding label in image_paths

image_paths = []

for training_name in training_names:

image_path = os.path.join(train_path, training_name)

image_paths += [image_path]

# Create feature extraction and keypoint detector objects

fea_det = cv2.FeatureDetector_create("SIFT")

des_ext = cv2.DescriptorExtractor_create("SIFT")

# List where all the descriptors are stored

des_list = []

for i, image_path in enumerate(image_paths):

im = cv2.imread(image_path)

print "Extract SIFT of%s image,%d of%d images" %(training_names[i], i, len(image_paths))

kpts = fea_det.detect(im)

kpts, des = des_ext.compute(im, kpts)

# rootsift

#rs = RootSIFT()

#des = rs.compute(kpts, des)

des_list.append((image_path, des))

# Stack all the descriptors vertically in a numpy array

#downsampling = 1

#descriptors = des_list[0][1][::downsampling,:]

#for image_path, descriptor in des_list[1:]:

# descriptors = np.vstack((descriptors, descriptor[::downsampling,:]))

# Stack all the descriptors vertically in a numpy array

descriptors = des_list[0][1]

for image_path, descriptor in des_list[1:]:

descriptors = np.vstack((descriptors, descriptor))

# Perform k-means clustering

print "Start k-means:%d words,%d key points" %(numWords, descriptors.shape[0])

voc, variance = kmeans(descriptors, numWords, 1)

# Calculate the histogram of features

im_features = np.zeros((len(image_paths), numWords), "float32")

for i in xrange(len(image_paths)):

words, distance = vq(des_list[i][1],voc)

for w in words:

im_features[i][w] += 1

# Perform Tf-Idf vectorization

nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)

idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

# Perform L2 normalization

im_features = im_features*idf

im_features = preprocessing.normalize(im_features, norm='l2')

joblib.dump((im_features, image_paths, idf, numWords, voc), "bof.pkl", compress=3)


python findFeatures.py -t dataset/train/



#python search.py -i dataset/train/ukbench00000.jpg

import argparse as ap

import cv2

import imutils

import numpy as np

import os

from sklearn.externals import joblib

from scipy.cluster.vq import *

from sklearn import preprocessing

import numpy as np

from pylab import *

from PIL import Image

from rootsift import RootSIFT

# Get the path of the training set

parser = ap.ArgumentParser()

parser.add_argument("-i", "--image", help="Path to query image", required="True")

args = vars(parser.parse_args())

# Get query image path

image_path = args["image"]

# Load the classifier, class names, scaler, number of clusters and vocabulary

im_features, image_paths, idf, numWords, voc = joblib.load("bof.pkl")

# Create feature extraction and keypoint detector objects

fea_det = cv2.FeatureDetector_create("SIFT")

des_ext = cv2.DescriptorExtractor_create("SIFT")

# List where all the descriptors are stored

des_list = []

im = cv2.imread(image_path)

kpts = fea_det.detect(im)

kpts, des = des_ext.compute(im, kpts)

# rootsift

#rs = RootSIFT()

#des = rs.compute(kpts, des)

des_list.append((image_path, des))

# Stack all the descriptors vertically in a numpy array

descriptors = des_list[0][1]


test_features = np.zeros((1, numWords), "float32")

words, distance = vq(descriptors,voc)

for w in words:

test_features[0][w] += 1

# Perform Tf-Idf vectorization and L2 normalization

test_features = test_features*idf

test_features = preprocessing.normalize(test_features, norm='l2')

score = np.dot(test_features, im_features.T)

rank_ID = np.argsort(-score)

# Visualize the results






for i, ID in enumerate(rank_ID[0][0:16]):

img = Image.open(image_paths[ID])







#python search.py -i dataset/train/ukbench00000.jpg(查询图像的路径)



from: http://yongyuan.name/blog/practical-BoW-for-image-retrieval-with-python.html

