该方法不需要依赖预测模型。

缺点:计算复杂度太高。运行起来让你崩溃。

'''A Graph-Based Approach for Active Learning in Regression'''
"""
This AL method does not depend on a regression model.
"""
import xlwt
import xlrd
import numpy as np
import pandas as pd
from pathlib import Path
from copy import deepcopy
from time import time
from sklearn.preprocessing import StandardScaler
from skactiveml.pool import CostEmbeddingAL
from skactiveml.utils import MISSING_LABEL
from sklearn.metrics.pairwise import pairwise_distances, pairwise_distances_argmin, pairwise_distances_argmin_min
from collections import OrderedDictclass GALR():def __init__(self, X, y, labeled, budget, X_test, y_test):self.X = Xself.y = yself.nSample, self.nDim = X.shapeself.labels = sorted(np.unique(self.y))self.nClass = len(self.labels)self.X_test = X_testself.y_test = y_testself.labeled = list(deepcopy(labeled))self.n_theta = [i for i in range(self.nClass - 1)]self.theta = Noneself.unlabeled = self.initialization()self.budget = deepcopy(budget)self.budgetLeft = deepcopy(budget)def initialization(self):unlabeled = [i for i in range(self.nSample)]for idx in self.labeled:unlabeled.remove(idx)return unlabeleddef select(self):while self.budgetLeft > 0:Q = OrderedDict()for idx in self.unlabeled:dist_before = pairwise_distances_argmin_min(self.X[self.unlabeled],self.X[self.labeled],metric="l1")[1]tmp_unlabeled = deepcopy(self.unlabeled)tmp_unlabeled.remove(idx)tmp_labeled = deepcopy(self.labeled)tmp_labeled.append(idx)dist_after = pairwise_distances_argmin_min(self.X[tmp_unlabeled],self.X[tmp_labeled],metric="l1")[1]Q[idx] = np.sum(dist_before)-np.sum(dist_after)tar_idx = max(Q, key=Q.get)self.budgetLeft -= 1self.unlabeled.remove(tar_idx)self.labeled.append(tar_idx)if __name__ == '__main__':names_list = ["toy"]for name in names_list:print("########################{}".format(name))data_path = Path(r"D:\OCdata")partition_path = Path(r"E:\FFFFF\DataPartitions")"""--------------read the whole data--------------------"""read_data_path = data_path.joinpath(name + ".csv")data = np.array(pd.read_csv(read_data_path, header=None))X = np.asarray(data[:, :-1], np.float64)scaler = StandardScaler()X = scaler.fit_transform(X)y = data[:, -1]y -= y.min()nClass = len(np.unique(y))Budget = 10 * nClass# Budget = 150"""--------read the partitions--------"""read_partition_path = str(partition_path.joinpath(name + ".xls"))book_partition = xlrd.open_workbook(read_partition_path)"""-----read the kmeans results according to the partition-----"""workbook = xlwt.Workbook()count = 0for SN in book_partition.sheet_names():S_Time = time()train_idx = []test_idx = []labeled = []table_partition = book_partition.sheet_by_name(SN)for idx in table_partition.col_values(0):if isinstance(idx,float):train_idx.append(int(idx))for idx in table_partition.col_values(1):if isinstance(idx,float):test_idx.append(int(idx))for idx in table_partition.col_values(2):if isinstance(idx,float):labeled.append(int(idx))X_train = X[train_idx]y_train = y[train_idx].astype(np.int32)X_test = X[test_idx]y_test = y[test_idx]model = GALR(X=X_train, y=y_train, labeled=labeled, budget=Budget, X_test=X_test, y_test=y_test)model.select()# SheetNames = "{}".format(count)sheet = workbook.add_sheet(SN)for i, idx in enumerate(train_idx):sheet.write(i, 0,  int(idx))for i, idx in enumerate(test_idx):sheet.write(i, 1, int(idx))for i, idx in enumerate(labeled):sheet.write(i, 2, int(idx))for i, idx in enumerate(model.labeled):sheet.write(i, 3, int(idx))print("SN:",SN," Time:",time()-S_Time)

忘记之前写过了,又写了一遍!

"""
ALCS
"""
import os
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import pairwise_distances
from collections import OrderedDict
from itertools import combinations, product
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_scoreclass GB(object):def __init__(self, X, y, budget):self.X = Xself.y = yself.nSample = len(y)self.budgetLeft = deepcopy(budget)self.distMatrix = pairwise_distances(X,metric="l1")self.labeled = []self.unlabeled = list(range(self.nSample))def select(self):tar_idx = np.random.choice(self.unlabeled, size=1, replace=False)[0]self.labeled.append(tar_idx)self.budgetLeft -= 1self.unlabeled.remove(tar_idx)while self.budgetLeft > 0:print("剩余预算:", self.budgetLeft)unlabeled = deepcopy(self.unlabeled)sum_theta = OrderedDict()for udx in unlabeled:sum_theta[udx] = 0.0for udx in unlabeled:# print("udx::",udx)tmp_unlabeled = deepcopy(unlabeled)tmp_unlabeled.remove(udx)tmp_labeled = deepcopy(self.labeled)tmp_labeled.append(udx)for idx in tmp_unlabeled:dist_list = []for jdx in tmp_labeled:dist_list.append(self.distMatrix[idx, jdx])sum_theta[udx] += min(dist_list)tar_idx = min(sum_theta, key=sum_theta.get)self.labeled.append(tar_idx)self.unlabeled.remove(tar_idx)self.budgetLeft -= 1if __name__ == '__main__':# --------------------------------------#data = np.array(pd.read_csv(r'D:\ExperimentalData\Aggregation\aggregation.csv', header=None))# data = np.array(pd.read_csv(r'D:\ExperimentalData\R15\R15.csv', header=None))# data = np.array(pd.read_csv(r'D:\ExperimentalData\Jain\Jain.csv', header=None))# data = np.array(pd.read_csv(r'D:\ExperimentalData\D31\D31.csv', header=None))# data = np.array(pd.read_csv(r'D:\ExperimentalData\Ecoli\ecoli.csv', header=None))# data = np.array(pd.read_csv(r'D:\ExperimentalData\Three blobs\ThreeBlobs.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\car.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\SWD.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\newthyroid.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\Knowledge.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\ESL.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\balance-scale.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\winequality-red.csv', header=None)) #不好# data = np.array(pd.read_csv(r'D:\OCdata\winequality-white.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\cleveland.csv', header=None)) #bukeyong# data = np.array(pd.read_csv(r'D:\OCdata\automobile.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\thyroid2.csv', header=None)) #bukeyong# data = np.array(pd.read_csv(r'D:\OCdata\thyroid.csv', header=None)) #bukeyong# data = np.array(pd.read_csv(r'D:\OCdata\glass.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\toy.csv', header=None))# data = np.array(pd.read_csv(r'D:\OCdata\ESL.csv', header=None)) #bukeyong# data = np.array(pd.read_csv(r'D:\ExperimentalData\dermatology\dermatology.csv', header=None))X = data[:, :-1]scaler = StandardScaler()X = scaler.fit_transform(X)y = data[:, -1]nClass = len(np.unique(y))print(" nClass ==", nClass)print(np.unique(y,return_counts=True))budget = 20 * nClass# budget -= 77model = GB(X=X, y=y, budget=budget)model.select()print(model.labeled)print(len(model.labeled),"==",len(set(model.labeled)))print("剩余预算:",model.budgetLeft)labeled = model.labeledtmp = []for i,idx in enumerate(labeled):tmp.append(idx)print(i+1," ",len(set(y[tmp])))

复现:A Graph-Based Approach for Active Learning in Regression相关推荐

  1. 《A sparse annotation strategy based on attention-guided active learning for 3D medic》--阅读笔记-Arxiv

    之前读过,但是没做笔记,就直接拉的其它作者的笔记了.感谢 https://blog.csdn.net/sinat_35779431/article/details/99682540 文章链接:http ...

  2. 主动学习active learning方法汇总

    更新2021/12/6 到目前为止看了不少主动学习的文献,简单做一下一些目前为止了解到的主动学习方法的整理吧. 起初是精读的文献中整理的,后来发现在精读文献的方法比较里也有一些比较经典的主动学习方法, ...

  3. 主动学习(Active Learning)领域部分经典论文汇总

    只简单找了一小部分 CVPR [link] Active Image Segmentation Propagation CVPR 16 segmentation [link] The Power of ...

  4. 【Active Learning - 13】总结与展望 参考文献的整理与分享(The End...)

    写在前面: 本篇博文将作为"主动学习系列"博文的结尾.目前,本人在职的相关工作暂无与主动学习相关的需求.因此,之后大概率是不会再更新相关的博文了.能够分享的内容和资料,我都分享在这 ...

  5. 【论文汇总】人工智能顶会深度主动学习(Deep Active Learning)相关论文

    汇总2017年至今,ICCV\CVPR\NIPS\ECCV会议上发表的深度主动学习(Deep Active Learning)相关文章,根据原文中report的实验数据集划分为 图像分类 . 语义分割 ...

  6. Boosting Active Learning via Improving Test Performance

    文章来着第三十六届AAAI人工智能会议(AAAI-22) 概述: 主动学习(AL)的核心是应该选择哪些数据进行注释.现有的工作试图选择高度不确定或信息丰富的数据进行注释.然而,尚不清楚所选数据如何影响 ...

  7. Item Tagging for Information Retrieval: A Tripartite Graph Neural Network based Approach 用于信息检索的项目标签

    文章目录 摘要 简介 Tagging 方法 动机和总览 动机 总览 TagGNN-IT 节点表示 TagGNN-IT Propagation 损失 2.3 TagGNN-QI 2.3.1 边表示 2. ...

  8. A novel framework for detecting social bots with deep neural networks and active learning(SCI一区)

    目录 摘要 1 绪论 1.1. Social bots in OSNs 1.2. Challenges 1.3. Contribution and organization 2 相关工作 2.1. G ...

  9. 深度主动学习综述(Deep Active Learning)

    原文 Abstract 主动学习试图通过标记最少量的样本使得模型的性能收益最大化.而深度学习则对数据比较贪婪,需要大量的数据供给来优化海量的参数,从而使得模型学会如何提取高质量的特征.近年来,由于互联 ...

最新文章

  1. python显示文件夹图片_如何显示文件夹中的随机图片(Python)
  2. linux octave源码安装,在Linux操作系统上安装Octave的方法
  3. 【Hive】表生成(Table-Generating)函数
  4. 利用泰勒展开求高斯分布表
  5. 网络货运平台申请后的优劣势分析
  6. IIS管理器无法打开。启动后,在任务栏中有,但是窗口不见了
  7. Android 如何测试你的Base64是否正确展示
  8. 重启打印机(打印机任务无法取消时)
  9. 数据仓库架构以及多维数据模型的设计
  10. 卷积神经网络:一个模块化视角
  11. 关系型数据库中一对多,多对一,多对多关系(详细)
  12. XML和Schema命名空间详解---实例篇
  13. 计算机邀请函制作教案,计算机邀请函制作要点
  14. CVPR2020/UDA/图像翻译-Cross-domain Correspondence Learning for Exemplar-based Image Translation基于范例的跨域对应
  15. dedecms站点采集标签
  16. 为什么 1 字节表示的数值范围是 127 ~ -128
  17. 第三空间与第四空间的幻想(序言篇)
  18. 软件安全之代码注入技术 向目标 PE 文件注入 DLL notepad lpk.dll 远程线程函数 提权函数 OpenProcess VirtualAllocEx
  19. 让一个div水平并垂直显示的五种方法
  20. 现货白银继续高位震荡整理 空头是否销声敛迹

热门文章

  1. appium连接模拟器
  2. Mac下快捷键的符号所对应的按键
  3. 0040 基于文本界面的房屋出租系统
  4. 什么是web service ?
  5. 用英语描述计算机操作,描述计算机RAM ROM的英语单词
  6. mysql实现宠物主人登陆的数据访问_使用DAO模式开发宠物管理系统
  7. 【713. 乘积小于 K 的子数组】
  8. FFMPEG与RTMP
  9. 阿里云ET工业大脑获“可信云”工业智能云奖
  10. UEFI原理与编程(四)(dec dsc inf文件)