












from __future__ import print_functionfrom six.moves import cPickle as pickle
import numpy as np
import os
from scipy.misc import imread
import platformdef load_pickle(f):version = platform.python_version_tuple()if version[0] == '2':return pickle.load(f)elif version[0] == '3':return pickle.load(f, encoding='latin1')raise ValueError("invalid python version: {}".format(version))def load_CIFAR_batch(filename):""" load single batch of cifar """with open(filename, 'rb') as f:datadict = load_pickle(f)X = datadict['data']Y = datadict['labels']X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")Y = np.array(Y)return X, Ydef load_CIFAR10(ROOT):""" load all of cifar """xs = []ys = []for b in range(1, 6):f = os.path.join(ROOT, 'data_batch_%d' % (b,))X, Y = load_CIFAR_batch(f)xs.append(X)ys.append(Y)Xtr = np.concatenate(xs)Ytr = np.concatenate(ys)del X, YXte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))return Xtr, Ytr, Xte, Ytedef get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,subtract_mean=True):"""Load the CIFAR-10 dataset from disk and perform preprocessing to prepareit for classifiers. These are the same steps as we used for the SVM, butcondensed to a single function."""# Load the raw CIFAR-10 datacifar10_dir = 'cs231n/datasets/cifar-10-batches-py'X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)# Subsample the datamask = list(range(num_training, num_training + num_validation))X_val = X_train[mask]y_val = y_train[mask]mask = list(range(num_training))X_train = X_train[mask]y_train = y_train[mask]mask = list(range(num_test))X_test = X_test[mask]y_test = y_test[mask]# Normalize the data: subtract the mean imageif subtract_mean:mean_image = np.mean(X_train, axis=0)X_train -= mean_imageX_val -= mean_imageX_test -= mean_image# Transpose so that channels come firstX_train = X_train.transpose(0, 3, 1, 2).copy()X_val = X_val.transpose(0, 3, 1, 2).copy()X_test = X_test.transpose(0, 3, 1, 2).copy()# Package data into a dictionaryreturn {'X_train': X_train, 'y_train': y_train,'X_val': X_val, 'y_val': y_val,'X_test': X_test, 'y_test': y_test,}def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):"""Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, andTinyImageNet-200 have the same directory structure, so this can be usedto load any of them.Inputs:- path: String giving path to the directory to load.- dtype: numpy datatype used to load the data.- subtract_mean: Whether to subtract the mean training image.Returns: A dictionary with the following entries:- class_names: A list where class_names[i] is a list of strings giving theWordNet names for class i in the loaded dataset.- X_train: (N_tr, 3, 64, 64) array of training images- y_train: (N_tr,) array of training labels- X_val: (N_val, 3, 64, 64) array of validation images- y_val: (N_val,) array of validation labels- X_test: (N_test, 3, 64, 64) array of testing images.- y_test: (N_test,) array of test labels; if test labels are not available(such as in student code) then y_test will be None.- mean_image: (3, 64, 64) array giving mean training image"""# First load wnidswith open(os.path.join(path, 'wnids.txt'), 'r') as f:wnids = [x.strip() for x in f]# Map wnids to integer labelswnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}# Use words.txt to get names for each classwith open(os.path.join(path, 'words.txt'), 'r') as f:wnid_to_words = dict(line.split('\t') for line in f)for wnid, words in wnid_to_words.iteritems():wnid_to_words[wnid] = [w.strip() for w in words.split(',')]class_names = [wnid_to_words[wnid] for wnid in wnids]# Next load training data.X_train = []y_train = []for i, wnid in enumerate(wnids):if (i + 1) % 20 == 0:print('loading training data for synset %d / %d' % (i + 1, len(wnids)))# To figure out the filenames we need to open the boxes fileboxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)with open(boxes_file, 'r') as f:filenames = [x.split('\t')[0] for x in f]num_images = len(filenames)X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)for j, img_file in enumerate(filenames):img_file = os.path.join(path, 'train', wnid, 'images', img_file)img = imread(img_file)if img.ndim == 2:## grayscale fileimg.shape = (64, 64, 1)X_train_block[j] = img.transpose(2, 0, 1)X_train.append(X_train_block)y_train.append(y_train_block)# We need to concatenate all training dataX_train = np.concatenate(X_train, axis=0)y_train = np.concatenate(y_train, axis=0)# Next load validation datawith open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:img_files = []val_wnids = []for line in f:img_file, wnid = line.split('\t')[:2]img_files.append(img_file)val_wnids.append(wnid)num_val = len(img_files)y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)for i, img_file in enumerate(img_files):img_file = os.path.join(path, 'val', 'images', img_file)img = imread(img_file)if img.ndim == 2:img.shape = (64, 64, 1)X_val[i] = img.transpose(2, 0, 1)# Next load test images# Students won't have test labels, so we need to iterate over files in the# images directory.img_files = os.listdir(os.path.join(path, 'test', 'images'))X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)for i, img_file in enumerate(img_files):img_file = os.path.join(path, 'test', 'images', img_file)img = imread(img_file)if img.ndim == 2:img.shape = (64, 64, 1)X_test[i] = img.transpose(2, 0, 1)y_test = Noney_test_file = os.path.join(path, 'test', 'test_annotations.txt')if os.path.isfile(y_test_file):with open(y_test_file, 'r') as f:img_file_to_wnid = {}for line in f:line = line.split('\t')img_file_to_wnid[line[0]] = line[1]y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]y_test = np.array(y_test)mean_image = X_train.mean(axis=0)if subtract_mean:X_train -= mean_image[None]X_val -= mean_image[None]X_test -= mean_image[None]return {'class_names': class_names,'X_train': X_train,'y_train': y_train,'X_val': X_val,'y_val': y_val,'X_test': X_test,'y_test': y_test,'class_names': class_names,'mean_image': mean_image,}def load_models(models_dir):"""Load saved models from disk. This will attempt to unpickle all files in adirectory; any files that give errors on unpickling (such as README.txt) willbe skipped.Inputs:- models_dir: String giving the path to a directory containing model files.Each model file is a pickled dictionary with a 'model' field.Returns:A dictionary mapping model file names to models."""models = {}for model_file in os.listdir(models_dir):with open(os.path.join(models_dir, model_file), 'rb') as f:try:models[model_file] = load_pickle(f)['model']except pickle.UnpicklingError:continuereturn models


from __future__ import print_function
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
# from past.builtins import xrangedef time_function(f,*args):import timetic = time.time()f(*args)toc = time.time()return toc-ticplt.rcParams['figure.figsize'] = (10.0,8.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
cifar10_dir = 'cs231n/dataset/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)print('Traing data shape: ',X_train.shape)
print('Traing labels shape: ',y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)



classes =['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):idxs = np.flatnonzero(y_train == y)# 找出标签为y的全部索引idxs = np.random.choice(idxs, samples_per_class, replace = False)# 从中随机选择7个数据for i,idx in enumerate(idxs):plt_idx = i*num_classes +y +1plt.subplot(samples_per_class, num_classes, plt_idx)plt.imshow(X_train[idx].astype('uint8'))plt.axis('off')if i ==0 :plt.title(cls)



num_training = 5000
mask = list (range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
num_test = 500
mask = list (range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]



X_train = np.reshape(X_train, (X_train.shape[0],-1))
X_test = np.reshape(X_test,(X_test.shape[0],-1))




class k_nearest_neighbor(object):def __init__(self):passdef train(self,X,y):self.X_train = Xself.y_train = y


    def predict(self,X,k,numloops = 1):if numloops == 1:dists = self.compute_distances_one_loop(X)elif numloops == 2:dists = self.compute_distances_two_loops(X)else:print("error")return self.predict_labels(dists,k=k)


    def compute_distances_two_loops(self,X):num_test = X.shape[0]num_train = self.X_train.shape[0]dists = np.zeros((num_test,num_train))for i in range(num_test):for j in range(num_train):dists[i][j]=np.sqrt(np.sum(np.square(self.X_train[j,:]- X[i,:])))# print(dists)return distsdef compute_distances_one_loop(self,X):num_test = X.shape[0]num_train = self.X_train.shape[0]dists = np.zeros((num_test, num_train))for i in range(num_test):dists[i] = np.sqrt(np.sum(np.square(X[i, :] - self.X_train),axis=1))return distsdef compute_distance_no_loops(self,X):# dists = (x^2+x_train^2-2xx_train^T)^(1/2)num_test = X.shape[0]num_train = self.X_train.shape[0]dists = np.zeros((num_test,num_train))dists = np.multiply(np.dot(X,self.X_train.T),-2)sq1 = np.sum(np.square(X),axis=1,keepdims = True)sq2 = np.sum(np.square(self.X_train),axis=1)dists = np.add(dists,sq1)dists = np.add(dists,sq2)dists = np.sqrt(dists)return dists


    def predict_labels(self,dists,k):num_test = dists.shape[0]y_pred = np.zeros(num_test)for i in range(num_test):closest_y = self.y_train[np.argsort(dists[i,:])[:k]]y_pred[i] = np.argmax(np.bincount(closest_y))return y_pred


from cs231n.classifiers import k_nearest_neighbor
knn= k_nearest_neighbor()
knn.train(X_train, y_train)dists = knn.compute_distances_two_loops(X_test)
# print(dists.shape)
# plt.imshow(dists, interpolation = 'none')
# plt.show()y_test_pred = knn.predict_labels(dists,k=10)
# print(y_test_pred)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct)/num_test
print('Got %d / %d correct => accuracy: %f'% (num_correct,num_test,accuracy))



num_folds = 5
k_choices = [1,3,5,8,10,12,15,20,50,100]
X_train_folds = []
y_train_folds = []
X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)
k_to_accuracies = {}
for k in k_choices:accuracies = np.zeros(num_folds)for fold in range(num_folds):temp_X = X_train_folds[:]temp_y = y_train_folds[:]X_validate_fold = temp_X.pop(fold)y_validate_fold = temp_y.pop(fold)temp_X = np.array([y for x in temp_X for y in x])temp_y = np.array([y for x in temp_y for y in x])knn.train(temp_X,temp_y)y_test_pred = knn.predict(X_validate_fold,k =k)num_correct = np.sum(y_test_pred == y_validate_fold)accuracy = float(num_correct)/num_testaccuracies[fold]= accuracyk_to_accuracies[k]=accuraciesfor k in sorted(k_to_accuracies):for accuracy in k_to_accuracies[k]:print('k = %d, accuracy = %f'% (k ,accuracy))


for k in k_choices:accuracies = k_to_accuracies[k]plt.scatter([k]*len(accuracies),accuracies)accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies)])accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies)])plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)plt.title('Cross-validation on k')plt.xlabel('k')plt.ylabel('Cross-validation accuracy')plt.show()




CS231n Assignment1 Knn相关推荐

  1. CS231n assignment1 KNN部分用到的函数

    1. plt.rcParams 摘自:https://www.cnblogs.com/pacino12134/p/9776882.html 作用 设置matplotlib的配置参数,pylot使用rc ...

  2. 【实验小结】cs231n assignment1 knn 部分

    1. 前言 这个是斯坦福 cs231n 课程的课程作业, 在做这个课程作业的过程中, 遇到了各种问题, 通过查阅资料加以解决, 加深了对课程内容的理解, 以及熟悉了相应的python 代码实现 工程地 ...

  3. cs231n assignment1 SVM

    上一篇:cs231n assignment1 knn 文章目录 SVM Inline Question SVM 支持向量机的损失函数为 Li=∑j!=yimax⁡(0,sj−syi+△)L_{i}=\ ...

  4. Win10下CS231n assignment1 环境配置

    CS231n assignment1 环境配置步骤 环境: Windows10 64bit 刚看完cs231n2017视频的前两节课,想做作业,于是在网上找配置windows10环境的教程.遇到一些问 ...

  5. cs231n assignment1 环境搭建+实践操作

    网易云课程视频及作业链接 http://study.163.com/course/courseMain.htm?courseId=1003223001 1. 环境搭建 根据我第一篇的文章成功进入了环境 ...

  6. cs231n assignment1 tips

    1. SVM求dW 这个问题我卡了很久,一直想不通这里到底怎么算的,在GitHub上看了好几个人的代码也硬是没搞懂,最后参考了课程论坛(https://www.reddit.com/r/cs231n/ ...

  7. 关于CIFAR-10图像分类总结

    关于CIFAR-10图像分类问题,网络上有很多资源,这里记录一些在我的学习过程中的资料,主要是关于深度卷积神经网络处理图像分类的,包括VGGNet,GoogLeNet以及ResNet,也记录了一些杂七 ...

  8. CS231n——指南向report1及assignment1 solution

    Lecture1 Course Introduction Lecture2 Image Classification 图像识别的困难 向量范数度量图片差异 验证集用于超参数调优及交叉验证 Assign ...

  9. CS231n课程笔记翻译系列之目录汇总

    知乎上CS231n课程翻译系列 翻译的笔记非常好,为了方便查看,这里把所有目录列于此,并给出链接. Python Numpy教程(全篇) Python 基本数据类型 容器(列表, 字典, 集合, 元组 ...

  10. 如何在本地完成CS231n课程作业

    最近开始学习斯坦福大学的CS231n课程,课程地址:网易云课堂,只有中文字幕,现在学完了1-7课时,准备着手做一下第一次作业,但是第一次接触不免有些手忙脚乱,自己探索了半天,准备写一个教程给和我一样的 ...


  1. 【NOIP2015】斗地主 题解
  2. (Mybatis)日志工厂
  3. 华硕老毛子(Padavan)——L2TP连接自动重连解决方案
  4. 5.5的performance_schema
  5. 用swing设计一个打地鼠小游戏_这7个风靡欧美的英语小游戏,学会胜过刷100道题!...
  6. Android热修复Tinker接入文档
  7. hdu 3966(树链剖分+线段树区间更新)
  8. php 中文字符串截取函数--比较好的
  9. python和lua哪个有前途_python和lua数据类型的比较
  10. 《javascript高级程序设计》读书笔记——作用域
  11. ARTS打卡计划第一周-Tips-ControllerAdvice的使用
  12. Github上优秀的开源小程序汇总
  13. 设置notepad++背景护眼色
  14. 【亲测有效】Visual Studio Installer 稍等片刻...正在提取文件 进度条卡住不动 0B每秒 一段时间后提示 循环下载安装文件 无法下载安装文件。请检查Internet 连接
  15. 字节跳动否认完成支付牌照收购,但金融野心一直有...
  16. canvas示例样式_使用js canvas和atari vcs trivia制作基本html游戏的快速示例
  17. 在苹果Mac电脑中如何将键盘当作鼠标使用?
  18. 微信公众号的开发:基于Java版本的服务器(1)
  19. 大数据项目离线数仓(全 )二(数仓系统)
  20. 【Linux】linux进程--进程控制:进程创建、进程终止、进程等待、进程程序替换


  1. php laravel 相关收集
  2. 一个小小的发现--音频也八卦
  3. 如何利用ping命令拥有最佳MTU?
  4. 复制当前地址到系统剪贴板
  5. 郁闷!我的Gmail邮箱的问题!
  6. 基于SSM实现旅游住宿和导游系统
  7. 2021最新SSM博客,功能完善,初云博客增强版
  8. 定时器和for循环数组_59.for循环遍历数组、集合(含代码)
  9. C程序设计--文件1--文件的分类+文件的打开和关闭
  10. hive根据已有表创建新表_读取Hive中所有表的表结构,并在新Hive库中创建表,索引等...