python实现logistic_使用python实现logistic二分类

这段时间做了一个二分类的任务，训练数据是8000个包含1000个特征和一个label的数据。下面记录一下使用python实现logistic二分类的代码。

import os

import time

import numpy as np

# 获取路径

def get_directory(file_name):

path = os.getcwd()

directory = os.path.join(path,file_name)

return directory

# 读取速度相对较慢

def get_train_data(directory):

data = np.loadtxt(directory,delimiter=',')

print(data.shape)

# 读取速度相对更快

def loadDataSet(file_name, label_existed_flag):

feats = []

labels = []

fr = open(file_name)

lines = fr.readlines()

for line in lines:

temp = []

allInfo = line.strip().split(',')

dims = len(allInfo)

if label_existed_flag == 1:

for index in range(dims - 1):

temp.append(float(allInfo[index]))

feats.append(temp)

labels.append(float(allInfo[dims - 1]))

else:

for index in range(dims):

temp.append(float(allInfo[index]))

feats.append(temp)

fr.close()

feats = np.array(feats)

labels = np.array(labels)

return feats, labels

# 读取tset的label

def loadLabels(file_name):

labels = []

fr = open(file_name)

lines = fr.readlines()

for line in lines:

allInfo = line.strip().split(',')

labels.append(float(allInfo[0]))

fr.close()

labels = np.array(labels)

return labels

class logistic():

def __init__(self,train_data,train_label,test_data,test_label,train_num,learning_rate):

self.train_data =train_data

self.train_label = train_label

self.test_data = test_data

self.test_label = test_label

self.train_num = train_num

self.learning_rate = learning_rate

self.weight = np.ones(len(train_data[0])+1, dtype=np.float)

def add_bias(self,data):

temp = np.ones(len(data))

new_data_transpose = np.row_stack((np.transpose(data),temp))

new_data = np.transpose(new_data_transpose)

return new_data

def sigmoid(self,x):

return 1 / (1 + np.exp(-x))

def compute(self,data):

#print(self.weight)

z = np.dot(data, np.transpose(self.weight))

# print(z)

predict = self.sigmoid(z)

return predict

def error(self,predict,label):

return np.power(predict - label, 2).sum()

def update(self,data,diff):

self.weight += self.learning_rate * np.dot(diff,data)/len(data)

def train(self):

data = self.add_bias(self.train_data)

for i in range(self.train_num):

predict = self.compute(data)

#print(predict)

error = self.error(predict,self.train_label)

diff = self.train_label - predict

self.update(data,diff)

print(error)

def calculate_predict(self,my_data):

data = self.add_bias(my_data)

predict = self.compute(data)

my_predict = np.zeros(len(predict))

for i in range(len(predict)):

if predict[i] > 0.5:

my_predict[i] = 1

else:

my_predict[i] = 0

return my_predict

def accuracy(self,predict):

label = self.train_label

num = 0

for i in range(len(predict)):

if predict[i] == label[i]:

num += 1

accuracy_num = num / len(predict)

return accuracy_num

def test(self):

predict = self.calculate_predict(self.test_data)

label = self.test_label

num = 0

for i in range(len(predict)):

if predict[i] == label[i]:

num += 1

accuracy_num = num / len(predict)

return accuracy_num

if __name__ == "__main__":

time1 = time.time()

# 输入文件名

train_file_name = "train_data.txt"

test_data_name = "test_data.txt"

test_label_name = "answer.txt"

# 获取绝对路径

train_directory = get_directory(train_file_name)

test_data_directory = get_directory(test_data_name)

test_label_directory = get_directory(test_label_name)

# 加载数据

train_feats, train_labels = loadDataSet(train_directory,1)

test_feats, test_labels = loadDataSet(test_data_directory,0)

real_test_label = loadLabels(test_label_directory)

# 学习率

train_num = 10000

learning_rate = 0.05

# logistic 分类

my_logistic = logistic(train_feats,train_labels,test_feats,real_test_label,train_num,learning_rate)

my_logistic.train()

my_predict = my_logistic.calculate_predict(train_feats)

my_accuracy = my_logistic.accuracy(my_predict)

print("train accuracy")

print(my_accuracy)

test_accuracy = my_logistic.test()

print("test accuracy")

print(test_accuracy)

time2 = time.time()

elapse = time2 - time1

print(elapse)

原文链接:https://blog.csdn.net/bofu_sun/article/details/105370473

python实现logistic_使用python实现logistic二分类相关推荐

【机器学习基础】用Python画出几种常见机器学习二分类损失函数
在二分类的监督学习中,支持向量机.逻辑斯谛回归与最大熵模型.提升方法各自使用合页损失函数.逻辑斯谛损失函数.指数损失函数,分别写为: 这 3 种损失函数都是 0-1 损失函数的上界,具有相似的形状.( ...
python实现logistic_用Python实现机器学习算法—Logistic 回归算法
在 Logistic 回归中,我们试图对给定输入特征的线性组合进行建模,来得到其二元变量的输出结果.例如,我们可以尝试使用竞选候选人花费的金钱和时间信息来预测选举的结果(胜或负).Logistic 回 ...
python实现logistic_用python实现Logistic
print("花费的时间为:", time.time() - start_time) plot_pr(0.6, precision, recall, "pos" ...
python实现GBDT算法的回归、二分类以及多分类，算法流程解读并可视化
向AI转型的程序员都关注了这个号
【小白学习PyTorch教程】七、基于乳腺癌数据集构建Logistic 二分类模型
「@Author:Runsen」在逻辑回归中预测的目标变量不是连续的,而是离散的.可以应用逻辑回归的一个示例是电子邮件分类:标识为垃圾邮件或非垃圾邮件.图片分类.文字分类都属于这一类. 在这篇博客中 ...
python机器学习库xgboost——xgboost算法（有分类和回归实例）
分享一个朋友的人工智能教程.零基础!通俗易懂!风趣幽默!还带黄段子!大家可以看看是否对自己有帮助:点击打开 docker/kubernetes入门视频教程全栈工程师开发手册 (作者:栾鹏) pyth ...
交叉熵损失函数分类_逻辑回归(Logistic Regression)二分类原理，交叉熵损失函数及python numpy实现...
本文目录: 1. sigmoid function (logistic function) 2. 逻辑回归二分类模型 3. 神经网络做二分类问题 4. python实现神经网络做二分类问题 ----- ...
机器学习（五）logistic回归进行二分类以及多分类（Python代码）
文章目录一.相关概念 1.logistic回归 1.1前言 1.2目的 1.3流程 1.4Sigmoid函数 1.4.1公式 1.4.2图像 1.5优缺点 2.最优化方法 2.1梯度上升算法 2.1 ...
logistic回归如何_第七章：利用Python实现Logistic回归分类模型
免责声明:本文是通过网络收集并结合自身学习等途径合法获取,仅作为学习交流使用,其版权归出版社或者原创作者所有,并不对涉及的版权问题负责.若原创作者或者出版社认为侵权,请联系及时联系,我将立即删除文章, ...

python实现logistic_使用python实现logistic二分类

python实现logistic_使用python实现logistic二分类相关推荐

最新文章

热门文章