tensorflow基于csv数据集实现多元线性回归并预测

#coding:utf8
import tensorflow as tf
from sklearn import linear_model
from sklearn import preprocessing
import numpy as npdef read_data(file_queue):'''the function is to get features and label (即样本特征和样本的标签）数据来源是csv的文件，采用tensorflow 自带的对csv文件的处理方式:param file_queue::return: features,label'''# 读取的时候需要跳过第一行reader = tf.TextLineReader(skip_header_lines=1)key, value = reader.read(file_queue)# 对于数据源中空的值设置默认值record_defaults = [[''], [''], [''], [''], [0.], [0.], [0.], [0.], [''],[0], [''], [0.], [''], [''], [0]]# 定义decoder，每次读取的执行都从文件中读取一行。然后，decode_csv 操作将结果解析为张量列表province, city, address, postCode, longitude,latitude, price, buildingTypeId, buildingTypeName, tradeTypeId, tradeTypeName, expectedDealPrice, listingDate, delislingDate, daysOnMarket = tf.decode_csv(value, record_defaults)#对非数值数据进行编码：buildingTypeNamepreprocess_buildingTypeName_op = tf.case({tf.equal(buildingTypeName, tf.constant('Residential')): lambda: tf.constant(0.00),tf.equal(buildingTypeName, tf.constant('Condo')): lambda: tf.constant(1.00),tf.equal(buildingTypeName, tf.constant('Mobile Home')): lambda: tf.constant(2.00),tf.equal(buildingTypeName, tf.constant('No Building')): lambda: tf.constant(3.00),tf.equal(buildingTypeName, tf.constant('Row / Townhouse')): lambda: tf.constant(4.00),tf.equal(buildingTypeName, tf.constant('Duplex')): lambda: tf.constant(5.00),tf.equal(buildingTypeName, tf.constant('Manufactured Home')): lambda: tf.constant(6.00),tf.equal(buildingTypeName, tf.constant('Commercial')): lambda: tf.constant(7.00),tf.equal(buildingTypeName, tf.constant('Other')): lambda: tf.constant(8.00),}, lambda: tf.constant(-1.00), exclusive=True)# 对tradeTypeName 进行编码 Sale，Leasepreprocess_tradeTypeName_op = tf.case({tf.equal(tradeTypeName, tf.constant('Sale')): lambda: tf.constant(0.00),tf.equal(tradeTypeName, tf.constant('Lease')): lambda: tf.constant(1.00),}, lambda: tf.constant(-1.00), exclusive=True)features = tf.stack([latitude,longitude,price, preprocess_buildingTypeName_op, preprocess_tradeTypeName_op,expectedDealPrice])return features, daysOnMarketdef create_pipeline(filename,batch_size,num_epochs=None):'''the function is to get every batch example and label此处使用的是tf.train.batch，即顺序获取，非随机获取，随机获取采用的方法是：tf.train.shuffle_batch:param filename::param batch_size::param num_epochs::return:example_batch,label_batch'''file_queue = tf.train.string_input_producer([filename],num_epochs=num_epochs)# example,label 样本和样本标签,batch_size 返回一个样本batch样本集的样本个数example,dayOnMarket = read_data(file_queue)# 出队后队列至少剩下的数据个数，小于capacity（队列的长度）否则会报错，min_after_dequeue = 1000#队列的长度capacity = min_after_dequeue+batch_size# 顺序获取每一批数据example_batch,daysOnMarket_batch= tf.train.batch([example,dayOnMarket],batch_size=batch_size,capacity=capacity)#顺序读取return example_batch,daysOnMarket_batchdef train(batch_size, feature_num,learn_rate,filename):'''the function is to train to get w and b:param batch_size: 批量大小:param feature_num: 特征个数:param learn_rate: 学习率:param filename:csv文件名称:return: w,b'''# 预处理输入的样本和标签，后面用获取的数据进行喂养x_data = tf.placeholder(tf.float32, [batch_size, feature_num])y_data = tf.placeholder(tf.float32, [batch_size])# 创建参数 w ，bw = tf.Variable(tf.random_uniform((feature_num, 1), -1.0, 1.0))b = tf.Variable(tf.random_uniform((1, 1), -1.0, 1.0))# 定义预测的yy = tf.add(tf.matmul(x_data, w), b)#定义损失函数loss = tf.reduce_mean(tf.square(y - y_data)) / 2#定义优化器，这里采用梯度下降的方法optimizer = tf.train.GradientDescentOptimizer(learn_rate)# 训练train = optimizer.minimize(loss)# 获取 样本和标签example_batch, daysOnMarket_batch = create_pipeline(filename, batch_size)# 初始化全局和局部变量init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())print('.........................>>>>开始会话')# 创建会话，采用上下文管理器的方式，无需手动关闭会话with tf.Session() as sess:sess.run(init_op)# 创建一个队列coord = tf.train.Coordinator()threads = tf.train.start_queue_runners(coord=coord)for step in range(100):#获取正真的样本和标签example, label = sess.run([example_batch, daysOnMarket_batch])print('第%d批数据'%(step))print(example, label)print('.......这一批数据的直接参数')reg = linear_model.LinearRegression()reg.fit(example, label)print("Coefficients of sklearn: W=%s, b=%f" % (reg.coef_, reg.intercept_))# 数据归一化处理scaler = preprocessing.StandardScaler().fit(example)print(scaler.mean_, scaler.scale_)x_data_standard = scaler.transform(example)sess.run(train, feed_dict={x_data: x_data_standard, y_data: label})# 每十步获取一次w和bif step % 10 == 0:print('当前w值和b值')print(sess.run(w, feed_dict={x_data: x_data_standard, y_data: label}),sess.run(b, feed_dict={x_data: x_data_standard, y_data: label}))print('。。。。。。。》》》训练后得到w和b')theta = sess.run(w).flatten()intercept = sess.run(b).flatten()print('W:%s' % theta)print('b:%f' % intercept)coord.request_stop()coord.join(threads)return theta, interceptdef predict(data, theta,intercept, feature_num):'''the function is to predict label(daysOnMarket):param data: 待预测数据:param theta: 训练得到的参数:param intercept: 截距:param feature_num: 特征个数（自变量个数）:return: result（label：预测结果）'''theta1 = tf.placeholder(tf.float32, [feature_num, 1])intercept1 = tf.placeholder(tf.float32, [1, 1])x_data = tf.placeholder(tf.float32, [1, feature_num])y = tf.add(tf.matmul(x_data, theta1), intercept1)init = tf.global_variables_initializer()with tf.Session() as sess:sess.run(init)result = sess.run(y, feed_dict={x_data: data, theta1: theta, intercept1: intercept})print(result)return resultdef data_type_conversion(data,theta,intercept,feature_num):'''the function is to do data_type_conversion(数据类型和形状转换）:param data::param theta::param intercept::return:'''real_data1 = data.astype(np.float32)real_data2 = data.reshape(1,feature_num)theta_tra = theta.astype(np.float32)theta_real = theta.reshape(feature_num, 1)intercept_tran = intercept.astype(np.float32)intercept_real = intercept.reshape(1, 1)return real_data2,theta_real,intercept_realif __name__ == '__main__':input_longitude =int(input('请输入经度'))input_latitude = int(input('请输入纬度'))input_price = int(input('请输入价格'))input_buildingtype = input('请输入房源类型名称：只有9种类型：Residential:0 ，Condo:1 Mobile Home:3,No Building:4 , Row / Townhouse:5 ，Duplex:6 ，Manufactured Home:7 ，Commercial:8 ，Other:9')input_tradetype = input('请输入交易形式：只有两种Sale:0，Lease:1')input_expected_deal_price = int(input('请输入期望的交易价格'))data = np.array([input_longitude,input_latitude,input_price,input_buildingtype,input_tradetype,input_expected_deal_price])theta, intercept = train(10, 6, 0.3, 'house_info.csv')data_real, theta_real, intercept_real = data_type_conversion(data, theta, intercept,6)daysOnmarket = predict(data_real, theta_real, intercept_real, 6)print('预测的天数:%d'%int(daysOnmarket))

转载于:https://www.cnblogs.com/bluesl/p/9215749.html

tensorflow基于csv数据集实现多元线性回归并预测相关推荐

TensorFlow基于cifar10数据集实现进阶的卷积网络
TensorFlow基于cifar10数据集实现进阶的卷积网络学习链接 CIFAR10模型及数据集介绍综述 CIFAR10数据集介绍 CIFAR10数据集可视化 CIFAR10模型 CIFAR10 ...
机器学习算法（五）：基于企鹅数据集的决策树分类预测
机器学习算法(五):基于企鹅数据集的决策树分类预测 1 逻决策树的介绍和应用 1.1 决策树的介绍决策树是一种常见的分类模型,在金融风控.医疗辅助诊断等诸多行业具有较为广泛的应用.决策树的核心思想是 ...
机器学习Tensorflow基于MNIST数据集识别自己的手写数字（读取和测试自己的模型）
机器学习Tensorflow基于MNIST数据集识别自己的手写数字(读取和测试自己的模型)
TensorFlow基于minist数据集实现手写字识别实战的三个模型
手写字识别 model1:输入层→全连接→输出层softmax model2:输入层→全连接→隐含层→全连接→输出层softmax model3:输入层→卷积层1→卷积层2→全连接→dropout层→ ...
基于不平衡数据集的中风分析预测
摘要:近些年来随着社会人口老龄化及城镇化步伐进一步加快,城市居民不太健康的生活形式盛行,心脑血管病症的凶险要素明显增多,我国中风的患病率具有明显增长.然而中风的诱使因素多,临床诊断复杂,且尚未有有 ...
Tensorflow基于minist数据集实现自编码器
Tensorflow实现自编码器自编码器 Denoising AutoEncoder(去噪自编码器) 自编码器特征的稀疏表达:使用少量的基本特征组合拼装得到更高层抽象的特征. 如:图像碎片可由少量 ...
A.机器学习入门算法（五）：基于企鹅数据集的决策树分类预测
[机器学习入门与实践]入门必看系列,含数据挖掘项目实战:数据融合.特征优化.特征降维.探索性分析等,实战带你掌握机器学习数据挖掘专栏详细介绍:[机器学习入门与实践]合集入门必看系列,含数据挖掘项目实 ...
TF之GD：基于tensorflow框架搭建GD算法利用Fashion-MNIST数据集实现多分类预测(92%)
TF之GD:基于tensorflow框架搭建GD算法利用Fashion-MNIST数据集实现多分类预测(92%) 目录输出结果实现代码输出结果 Successfully downloaded t ...
基于Keras的LSTM多变量时间序列预测（北京PM2.5数据集pollution.csv）
基于Keras的LSTM多变量时间序列预测传统的线性模型难以解决多变量或多输入问题,而神经网络如LSTM则擅长于处理多个变量的问题,该特性使 ...

tensorflow基于csv数据集实现多元线性回归并预测

tensorflow基于csv数据集实现多元线性回归并预测相关推荐

最新文章

热门文章