2021.0402时做

1数据准备
数据准备参考博文:insightface数据制作全过程记录
https://blog.csdn.net/CLOUD_J/article/details/98769515

2eval验证
在/recognition/common下verification.py文件

from __future__ import absolute_import
from __future__ import division
from __future__ import print_functionimport os
import argparse
import sys
import numpy as np
from scipy import misc
from sklearn.model_selection import KFold
from scipy import interpolate
import sklearn
import cv2
import math
import datetime
import pickle
from sklearn.decomposition import PCA
import mxnet as mx
from mxnet import ndarray as ndfrom recognition.ArcFace.verification import dumpRclass LFold:def __init__(self, n_splits=2, shuffle=False):self.n_splits = n_splitsif self.n_splits > 1:self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)def split(self, indices):if self.n_splits > 1:return self.k_fold.split(indices)else:return [(indices, indices)]def calculate_roc(thresholds,embeddings1,embeddings2,actual_issame,nrof_folds=10,pca=0):assert (embeddings1.shape[0] == embeddings2.shape[0])assert (embeddings1.shape[1] == embeddings2.shape[1])nrof_pairs = min(len(actual_issame), embeddings1.shape[0])nrof_thresholds = len(thresholds)k_fold = LFold(n_splits=nrof_folds, shuffle=False)tprs = np.zeros((nrof_folds, nrof_thresholds))fprs = np.zeros((nrof_folds, nrof_thresholds))accuracy = np.zeros((nrof_folds))indices = np.arange(nrof_pairs)#print('pca', pca)if pca == 0:diff = np.subtract(embeddings1, embeddings2)dist = np.sum(np.square(diff), 1)for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):#print('train_set', train_set)#print('test_set', test_set)if pca > 0:print('doing pca on', fold_idx)embed1_train = embeddings1[train_set]embed2_train = embeddings2[train_set]_embed_train = np.concatenate((embed1_train, embed2_train), axis=0)#print(_embed_train.shape)pca_model = PCA(n_components=pca)pca_model.fit(_embed_train)embed1 = pca_model.transform(embeddings1)embed2 = pca_model.transform(embeddings2)embed1 = sklearn.preprocessing.normalize(embed1)embed2 = sklearn.preprocessing.normalize(embed2)#print(embed1.shape, embed2.shape)diff = np.subtract(embed1, embed2)dist = np.sum(np.square(diff), 1)# Find the best threshold for the foldacc_train = np.zeros((nrof_thresholds))for threshold_idx, threshold in enumerate(thresholds):_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])best_threshold_index = np.argmax(acc_train)#print('threshold', thresholds[best_threshold_index])for threshold_idx, threshold in enumerate(thresholds):tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set],actual_issame[test_set])_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set],actual_issame[test_set])tpr = np.mean(tprs, 0)fpr = np.mean(fprs, 0)return tpr, fpr, accuracydef calculate_accuracy(threshold, dist, actual_issame):predict_issame = np.less(dist, threshold)tp = np.sum(np.logical_and(predict_issame, actual_issame))fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))tn = np.sum(np.logical_and(np.logical_not(predict_issame),np.logical_not(actual_issame)))fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)acc = float(tp + tn) / dist.sizereturn tpr, fpr, accdef calculate_val(thresholds,embeddings1,embeddings2,actual_issame,far_target,nrof_folds=10):assert (embeddings1.shape[0] == embeddings2.shape[0])assert (embeddings1.shape[1] == embeddings2.shape[1])nrof_pairs = min(len(actual_issame), embeddings1.shape[0])nrof_thresholds = len(thresholds)k_fold = LFold(n_splits=nrof_folds, shuffle=False)val = np.zeros(nrof_folds)far = np.zeros(nrof_folds)diff = np.subtract(embeddings1, embeddings2)dist = np.sum(np.square(diff), 1)indices = np.arange(nrof_pairs)for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):# Find the threshold that gives FAR = far_targetfar_train = np.zeros(nrof_thresholds)for threshold_idx, threshold in enumerate(thresholds):_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])if np.max(far_train) >= far_target:f = interpolate.interp1d(far_train, thresholds, kind='slinear')threshold = f(far_target)else:threshold = 0.0val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])val_mean = np.mean(val)far_mean = np.mean(far)val_std = np.std(val)return val_mean, val_std, far_meandef calculate_val_far(threshold, dist, actual_issame):predict_issame = np.less(dist, threshold)true_accept = np.sum(np.logical_and(predict_issame, actual_issame))false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))n_same = np.sum(actual_issame)n_diff = np.sum(np.logical_not(actual_issame))#print(true_accept, false_accept)#print(n_same, n_diff)val = float(true_accept) / float(n_same)far = float(false_accept) / float(n_diff)return val, fardef evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):# Calculate evaluation metricsthresholds = np.arange(0, 4, 0.01)embeddings1 = embeddings[0::2]embeddings2 = embeddings[1::2]tpr, fpr, accuracy = calculate_roc(thresholds,embeddings1,embeddings2,np.asarray(actual_issame),nrof_folds=nrof_folds,pca=pca)thresholds = np.arange(0, 4, 0.001)val, val_std, far = calculate_val(thresholds,embeddings1,embeddings2,np.asarray(actual_issame),1e-3,nrof_folds=nrof_folds)return tpr, fpr, accuracy, val, val_std, fardef load_bin(path, image_size):try:with open(path, 'rb') as f:bins, issame_list = pickle.load(f)  #py2except UnicodeDecodeError as e:with open(path, 'rb') as f:bins, issame_list = pickle.load(f, encoding='bytes')  #py3data_list = []for flip in [0, 1]:data = nd.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))data_list.append(data)for i in range(len(issame_list) * 2):_bin = bins[i]img = mx.image.imdecode(_bin)if img.shape[1] != image_size[0]:img = mx.image.resize_short(img, image_size[0])img = nd.transpose(img, axes=(2, 0, 1))for flip in [0, 1]:if flip == 1:img = mx.ndarray.flip(data=img, axis=2)data_list[flip][i][:] = imgif i % 1000 == 0:print('loading bin', i)print(data_list[0].shape)return (data_list, issame_list)def test(data_set,mx_model,batch_size,nfolds=10,data_extra=None,label_shape=None):print('testing verification..')data_list = data_set[0]issame_list = data_set[1]model = mx_modelembeddings_list = []if data_extra is not None:_data_extra = nd.array(data_extra)time_consumed = 0.0if label_shape is None:_label = nd.ones((batch_size, ))else:_label = nd.ones(label_shape)for i in range(len(data_list)):data = data_list[i]embeddings = Noneba = 0while ba < data.shape[0]:bb = min(ba + batch_size, data.shape[0])count = bb - ba_data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)#print(_data.shape, _label.shape)time0 = datetime.datetime.now()if data_extra is None:db = mx.io.DataBatch(data=(_data, ), label=(_label, ))else:db = mx.io.DataBatch(data=(_data, _data_extra),label=(_label, ))model.forward(db, is_train=False)net_out = model.get_outputs()#_arg, _aux = model.get_params()#__arg = {}#for k,v in _arg.iteritems():#  __arg[k] = v.as_in_context(_ctx)#_arg = __arg#_arg["data"] = _data.as_in_context(_ctx)#_arg["softmax_label"] = _label.as_in_context(_ctx)#for k,v in _arg.iteritems():#  print(k,v.context)#exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)#exe.forward(is_train=False)#net_out = exe.outputs_embeddings = net_out[0].asnumpy()time_now = datetime.datetime.now()diff = time_now - time0time_consumed += diff.total_seconds()#print(_embeddings.shape)if embeddings is None:embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]ba = bbembeddings_list.append(embeddings)_xnorm = 0.0_xnorm_cnt = 0for embed in embeddings_list:for i in range(embed.shape[0]):_em = embed[i]_norm = np.linalg.norm(_em)#print(_em.shape, _norm)_xnorm += _norm_xnorm_cnt += 1_xnorm /= _xnorm_cntembeddings = embeddings_list[0].copy()embeddings = sklearn.preprocessing.normalize(embeddings)acc1 = 0.0std1 = 0.0#_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)#acc1, std1 = np.mean(accuracy), np.std(accuracy)#print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))#embeddings = np.concatenate(embeddings_list, axis=1)embeddings = embeddings_list[0] + embeddings_list[1]embeddings = sklearn.preprocessing.normalize(embeddings)print(embeddings.shape)print('infer time', time_consumed)_, _, accuracy, val, val_std, far = evaluate(embeddings,issame_list,nrof_folds=nfolds)acc2, std2 = np.mean(accuracy), np.std(accuracy)return acc1, std1, acc2, std2, _xnorm, embeddings_listif __name__ == '__main__':parser = argparse.ArgumentParser(description='do verification')# generalparser.add_argument('--data-dir', default=r'G:\data\face1\faces_emore', help='')# parser.add_argument('--model',default='../maskinsightface/VarGFaceNet/model,1',help='path to load model.')parser.add_argument('--model',default='../maskinsightface/VarGFaceNet/model,1',help='path to load model.')parser.add_argument('--target',default='calfw,cfp_fp,agedb_30',help='test targets.')parser.add_argument('--gpu', default=0, type=int, help='gpu id')parser.add_argument('--batch-size', default=32, type=int, help='')parser.add_argument('--max', default='', type=str, help='')parser.add_argument('--mode', default=0, type=int, help='')parser.add_argument('--nfolds', default=10, type=int, help='')args = parser.parse_args()#sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))#import face_image#prop = face_image.load_property(args.data_dir)#image_size = prop.image_sizeimage_size = [112, 112]print('image_size', image_size)ctx = mx.gpu(args.gpu)nets = []model='../mobilefacenet612M/mxnet/new_model,0'model='../maskinsightface/res2-6-10-2-dim256/model,1'model='../zwnet443.3M/mxnet/zwnwet_model,0'vec = model.split(',')prefix = model.split(',')[0]epochs = []if len(vec) == 1:pdir = os.path.dirname(prefix)for fname in os.listdir(pdir):if not fname.endswith('.params'):continue_file = os.path.join(pdir, fname)if _file.startswith(prefix):epoch = int(fname.split('.')[0].split('-')[1])epochs.append(epoch)epochs = sorted(epochs, reverse=True)if len(args.max) > 0:_max = [int(x) for x in args.max.split(',')]assert len(_max) == 2if len(epochs) > _max[1]:epochs = epochs[_max[0]:_max[1]]else:epochs = [int(x) for x in vec[1].split('|')]print('model number', len(epochs))time0 = datetime.datetime.now()for epoch in epochs:print('loading', prefix, epoch)sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)#arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)all_layers = sym.get_internals()sym = all_layers['fc1_output']model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],image_size[1]))])model.set_params(arg_params, aux_params)nets.append(model)time_now = datetime.datetime.now()diff = time_now - time0print('model loading time', diff.total_seconds())ver_list = []ver_name_list = []for name in args.target.split(','):path = os.path.join(args.data_dir, name + ".bin")if os.path.exists(path):print('loading.. ', name)data_set = load_bin(path, image_size)ver_list.append(data_set)ver_name_list.append(name)if args.mode == 0:for i in range(len(ver_list)):results = []for model in nets:acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))print('[%s]Accuracy: %1.5f+-%1.5f' %(ver_name_list[i], acc1, std1))print('[%s]Accuracy-Flip: %1.5f+-%1.5f' %(ver_name_list[i], acc2, std2))results.append(acc2)print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))elif args.mode == 1:model = nets[0]test_badcase(ver_list[0], model, args.batch_size, args.target)else:model = nets[0]dumpR(ver_list[0], model, args.batch_size, args.target)

运行时出现了点问题,主要在于我们之前准备数据集时没有打乱数据导致正样本挤在一起,会使这一部分数据负样本为0,我们计算正确率会用负样本识别正确比上总负样样本数,分母出现0,所以做了更改calculate_val_far函数

def calculate_val_far(threshold, dist, actual_issame):predict_issame = np.less(dist, threshold)true_accept = np.sum(np.logical_and(predict_issame, actual_issame))false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))n_same = np.sum(actual_issame)n_diff = np.sum(np.logical_not(actual_issame))#print(true_accept, false_accept)#print(n_same, n_diff)if n_same == 0:val = 1else:val = float(true_accept) / float(n_same)if n_diff == 0:far = 0else:far = float(false_accept) / float(n_diff)return val, far

3代码解析
3.1主函数
主函数首先做了模型载入,数据载入bin文件,然后对载入的模型分别做测试,检测各个模型数据效果。
核心在这里,遍历ver_list不同数据集,遍历nets不同模型

if args.mode==0:for i in range(len(ver_list)):results = []for model in nets:acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))results.append(acc2)print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))elif args.mode==1:model = nets[0]test_badcase(ver_list[0], model, args.batch_size, args.target)else:model = nets[0]dumpR(ver_list[0], model, args.batch_size, args.target)

然后再提示几点,

--model', default='../../models/model-r50-am-lfw/model,50

该参数代表模型路径的名字加上训练的epoch,…/…/models/model-r50-am-lfw是路径,然后model是名字;
后面的50是epoch就是你可能在训练时会把多个epoch的结果输出,你可能验证不同epoch当时模型参数的效果。

3.2 test函数
测试函数
首先前向传播得到输出特征,然后计算它的范数,之后计算他的准确率

前向传播主要在这!这里对你的多个数据集遍历,当然你要是只有一个数据集就一次楼。data数据集,ba和bb随便起的名字,然后就这样不断取batchsize进行前向传播model.forward(db, is_train=False)然后将输出存到embeddings这个东西里,最后将多个数据集都存到embeddings_list

  for i in range( len(data_list) ):data = data_list[i]embeddings = Noneba = 0while ba<data.shape[0]:bb = min(ba+batch_size, data.shape[0])count = bb-ba_data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)#print(_data.shape, _label.shape)time0 = datetime.datetime.now()if data_extra is None:db = mx.io.DataBatch(data=(_data,), label=(_label,))else:db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))model.forward(db, is_train=False)net_out = model.get_outputs()#获取输出_embeddings = net_out[0].asnumpy()time_now = datetime.datetime.now()diff = time_now - time0time_consumed+=diff.total_seconds()#print(_embeddings.shape)if embeddings is None:#第一次的话先创建一个列表embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]#补进去ba = bbembeddings_list.append(embeddings)

第二步,做了个范数计算
计算一下特征的总平均范数

_xnorm = 0.0
  _xnorm_cnt = 0
  for embed in embeddings_list:
    for i in range(embed.shape[0]):
      _em = embed[i]
      _norm=np.linalg.norm(_em)
      #print(_em.shape, _norm)
      _xnorm+=_norm
      _xnorm_cnt+=1
  _xnorm /= _xnorm_cnt

第三步 计算准确率
这里传入特征列表和标签列表还有nrof_folds,啥意思,这个是做K折检测的,分K份检测。

_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
  acc2, std2 = np.mean(accuracy), np.std(accuracy)
1
2
这里有一点注意,文中有个 embeddings = embeddings_list[0] + embeddings_list[1]我理解把两个数据集组合验证。

3.3evaluate评估函数
首先先将数据集分了两块,就是原来是这样
A1 A2 A3 A4 B1 B2
这样A1和A2对比同类1
改成这样
A1 A3 B1奇数放一起
A2 A4 B2偶数放一起

python中a::b代表从a开始以b单位增长

这里还搞了个thresholds作为阈值,会在评估函数里遍历寻找最好的阈值。
完事做了两个评估
calculate_roc
calculate_val

def evaluate(embeddings, actual_issame, nrof_folds=10, pca = 0):
    # Calculate evaluation metrics
    thresholds = np.arange(0, 4, 0.01)
    embeddings1 = embeddings[0::2]
    embeddings2 = embeddings[1::2]
    tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
        np.asarray(actual_issame), nrof_folds=nrof_folds, pca = pca)
    thresholds = np.arange(0, 4, 0.001)
    val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
        np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
    return tpr, fpr, accuracy, val, val_std, far

3.4 calculate_roc
第一步 先生命一个K折数据类
1、这里assert是断言的意思,就是说后面那句话不对就直接报错;
2、LFold在前面有声明类,就是调用kfold这个包
3、

assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
    
    tprs = np.zeros((nrof_folds,nrof_thresholds))
    fprs = np.zeros((nrof_folds,nrof_thresholds))
    accuracy = np.zeros((nrof_folds))
    indices = np.arange(nrof_pairs)

第二步,求了下范数距离欧式距离

if pca==0:
      diff = np.subtract(embeddings1, embeddings2)#做减法
      dist = np.sum(np.square(diff),1)#求平方和

第三步 遍历thresholds寻找最好的阈值。
k_fold.split(indices)是分数据函数,用训练集取找好的阈值,用测试机打分。tprs暂时没用,关注accuracy准确率

for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):#print('train_set', train_set)#print('test_set', test_set)if pca>0:print('doing pca on', fold_idx)embed1_train = embeddings1[train_set]embed2_train = embeddings2[train_set]_embed_train = np.concatenate( (embed1_train, embed2_train), axis=0 )#print(_embed_train.shape)pca_model = PCA(n_components=pca)pca_model.fit(_embed_train)embed1 = pca_model.transform(embeddings1)embed2 = pca_model.transform(embeddings2)embed1 = sklearn.preprocessing.normalize(embed1)embed2 = sklearn.preprocessing.normalize(embed2)#print(embed1.shape, embed2.shape)diff = np.subtract(embed1, embed2)dist = np.sum(np.square(diff),1)# Find the best threshold for the foldacc_train = np.zeros((nrof_thresholds))for threshold_idx, threshold in enumerate(thresholds):#遍历找最好阈值_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])best_threshold_index = np.argmax(acc_train)#print('threshold', thresholds[best_threshold_index])for threshold_idx, threshold in enumerate(thresholds):tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])

3.5 准确率计算
核心函数在这,我们的改动也在这里。

说几个注意

np.less求最小值,求每个值与阈值相比,如果比阈值小则真。
np.logical_and代表逻辑与的意思,就是两个numpy进行与,把预测和真实进行与一下得到tp,就是预测正确的正样本truepositive。
np.logical_not(actual_issame)代表取反,给真是样本取反,
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))这样代表预测和真实样本取反的与就是错误预测的正样本。
tn,fn一样。

def calculate_accuracy(threshold, dist, actual_issame):
    predict_issame = np.less(dist, threshold)
    tp = np.sum(np.logical_and(predict_issame, actual_issame))
    fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
    tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
    fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
  
    tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
    fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
    acc = float(tp+tn)/dist.size
    return tpr, fpr, acc

最后结果如下
Accuracy没有,我们只有一个数据集,这里我理解的是Acuuracy是单个数据集准确率,Accuracy-Flip和其他数据集混在一起,这里就看0.99675即可。

前面阈值打印出来是1.39

4结果
LFW    CFP-FP
renet-r50    99.63%(99.80%)    92.66%(92.74%)
renet-r100    99.81%(99.77%)    95.94%(98.27%)
注意:括号内为github作者的结果,括号前为我的结果。结果取batchsize=16

目前对于差别有些疑问,还等待发现,如有大神能指点,还请指导。

5问题
1、内存问题

考虑减小batchsize

python3 verification.py --data-dir ../../datasets/lfw2/ --model ../../models/model-r100-ii/model,0 --nfolds 10 --batch-size 16
————————————————
版权声明:本文为CSDN博主「CloudCver」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/CLOUD_J/article/details/98882718

insightface测试验证集相关推荐

  1. ML基础 : 训练集,验证集,测试集关系及划分 Relation and Devision among training set, validation set and testing set...

    首先三个概念存在于 有监督学习的范畴 Training set: A set of examples used for learning, which is to fit the parameters ...

  2. 十折交叉验证10-fold cross validation, 数据集划分 训练集 验证集 测试集

    机器学习 数据挖掘 数据集划分 训练集 验证集 测试集 Q:如何将数据集划分为测试数据集和训练数据集? A:three ways: 1.像sklearn一样,提供一个将数据集切分成训练集和测试集的函数 ...

  3. 机器学习中的训练集 验证集 测试集的关系

    1.划分测试集目的 为了了解一个模型对新样本的泛化能力,唯一的办法是:让已经训练好的模型真正的处理新的样本. 解决方法: 将原始数据划分成两个部分:训练集 测试集.可以使用训练集来训练模型,然后用测试 ...

  4. 1. 验证集 -- 批量测试和可视化 2. 测试集 -- 批量测试和可视化

    1.验证集val.txt验证测试结果可视化~ 一步到位!!!批量验证集测试!!! import sys sys.path.append("..") sys.path.insert( ...

  5. R语言图形用户界面数据挖掘包Rattle:加载UCI糖尿病数据集、并启动Rattle图形用户界面、数据集变量重命名,为数据集结果变量添加标签、数据划分(训练集、测试集、验证集)、随机数设置

    R语言图形用户界面数据挖掘包Rattle:加载UCI糖尿病数据集.并启动Rattle图形用户界面.数据集变量重命名,为数据集结果变量添加标签.数据划分(训练集.测试集.验证集).随机数设置 目录

  6. 为什么引入验证集来评估机器学习模型?只用训练集和测试集可以吗?

    评估模型的重点是将数据划分为三个集合:训练集.验证集和测试集.在训练数据上训练模型,在验证数据上评估模型.一旦找到了最佳参数,就在测试数据上最后测试一次.你可能会问,为什么不是两个集合:一个训练集和一 ...

  7. 一文看懂 AI 训练集、验证集、测试集(附:分割方法+交叉验证)

    2019-12-20 20:01:00 数据在人工智能技术里是非常重要的!本篇文章将详细给大家介绍3种数据集:训练集.验证集.测试集. 同时还会介绍如何更合理的讲数据划分为3种数据集.最后给大家介绍一 ...

  8. 训练集(train set) 验证集(validation set) 测试集(test set)

    在有监督(supervise)的机器学习中,数据集常被分成2~3个,即:训练集(train set) 验证集(validation set) 测试集(test set). http://blog.si ...

  9. [机器学习] 训练集(train set) 验证集(validation set) 测试集(test set)

    在有监督(supervise)的机器学习中,数据集常被分成2~3个即: 训练集(train set) 验证集(validation set) 测试集(test set) 一般需要将样本分成独立的三部分 ...

最新文章

  1. Latex 参考文献,或者最后一页平衡
  2. element not visible的解决方法
  3. javabean简介
  4. POJ1258 Agri-Net【最小生成树】
  5. 爆改100多元卡西欧,可测速、测温、测海拔,一点不输智能运动表
  6. php比较函数代码,php字符串比较函数
  7. 电脑仙人掌机器人作文_神奇的仙人掌作文400字
  8. linux svn安装
  9. Leetcode 234. 回文链表 解题思路及C++实现
  10. Android USB 属性设置:ADB、RNDIS、MTP等
  11. pmc订单表格_复工了,读一则“如何提升订单准交率和生产效率”的真实故事
  12. js中 var a= b || c;
  13. 如何把Access中数据导入Mysql中 (转)
  14. 流水灯c语言代码switch,51单片机流水灯代码,四种方式,开关启动
  15. 微型计算机控制技术小论文,微型计算机控制技术结课论文..doc
  16. 结巴分词有前空格_结巴分词详细讲解
  17. 诚龙网刻报错_诚龙PXE网刻工具11.5
  18. 计算机主板巨头,主板主要厂商
  19. C语言——三目运算符的进阶用法,比较三个或者四个数的大小
  20. win7 host 中 vbox 虚拟机无法 attach USB device的问题

热门文章

  1. ubuntu开机出现:system program problem detected
  2. salt盐度与用户密码加密机制
  3. 广度优先搜索求解迷宫问题
  4. Android--AudioManager控制音量
  5. DevicePass-through及网卡的直接分配在Xen里面的实现
  6. oracle数据库查询表语句,oracle数据库重要的查询语句
  7. java和js的正则表达式一样吗_JavaScript与Java正则表达式写法的区别
  8. Android 利用方向传感器实现 指南针
  9. 算法 判断一个数是不是2的n次幂
  10. eclipse导入jar包_在命令行上操作JAR,WAR和EAR