【人工智能项目】深度学习实现白葡萄酒品质预测

任务介绍

评价一款葡萄酒时不外乎从颜色、酸度、甜度、香气、风味等入手,而决定这些就是葡萄酒的挥发酸度、糖分、密度等。

根据给出的白葡萄酒酸度、糖分、PH值、柠檬酸等数据,判断葡萄酒品质。

导入数据

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
train_data = pd.read_csv("./winequality_dataset/train.csv",header=0,index_col=None)
train_data.head()

EDA

train_data.info()

train_data.isnull().sum()

import matplotlib.pyplot as plt
%matplotlib inlinefig = plt.figure()
ax = fig.add_subplot(111)
ax.set(xlabel="total sulfur dioxide",ylabel="free sulfur dioxide")
ax.scatter(train_data["total sulfur dioxide"],train_data["free sulfur dioxide"],c="r")
plt.show()

数据集划分

from sklearn.model_selection import train_test_splitX = train_data.iloc[:,:-1]
y = np.ravel(train_data.quality)X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2019)
X.head()

y
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(5517, 11)
(5517,)
(1380, 11)
(1380,)

数据预处理

from sklearn.preprocessing import PolynomialFeaturesprint("Shape of X_train before transformation:",X_train.shape)
poly = PolynomialFeatures(degree=2,include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
X_poly = poly.transform(X)
print("Shape of X_train after transformation:",X_train_poly.shape)
Shape of X_train before transformation: (3200, 11)
Shape of X_train after transformation: (3200, 77
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScalerscaler = MinMaxScaler().fit(X_train_poly)
X_train = scaler.transform(X_train_poly)
X_test = scaler.transform(X_test_poly)X = scaler.transform(X_poly)

数据归一化处理

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScalerscaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)X = scaler.transform(X)

ML模型

# 传统机器方法大杂烩
from sklearn.preprocessing import StandardScaler,RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_scorefrom sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysisfrom catboost import CatBoostClassifierfrom sklearn.metrics import accuracy_score,confusion_matrix,classification_report
def get_models():models = []models.append(("LR",LogisticRegression()))models.append(("NB",GaussianNB()))models.append(("KNN",KNeighborsClassifier()))models.append(("DT",DecisionTreeClassifier()))models.append(("SVM rbf",SVC()))models.append(("SVM linear",SVC(kernel="linear")))models.append(("LDA",LinearDiscriminantAnalysis()))models.append(("Cat",CatBoostClassifier(silent=True)))return modelsdef cross_validation_scores_for_various_ml_models(X_cv,y_cv):print("cross validation accuracy".upper())models = get_models()results = []names = []for name,model in models:kfold = KFold(n_splits=5,shuffle=True,random_state=2019)cv_result = cross_val_score(model,X_cv,y_cv,cv=kfold,scoring="accuracy")names.append(name)results.append(cv_result)print("{}cross validation,accuracy:{:0.2f}".format(name,cv_result.mean()))
cross_validation_scores_for_various_ml_models(X,y)


Random Forest

from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.metrics import mean_absolute_error,classification_report
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from xgboost import XGBRegressor
scores = {}for n_estimators in range(10,810,10):RF_model = RandomForestClassifier(n_estimators=n_estimators,random_state=2019)RF_model.fit(X_train,y_train)RF_predictions = RF_model.predict(X_test)RF_mae = mean_absolute_error(RF_predictions,y_test)scores[n_estimators] = RF_mae
import matplotlib.pyplot as plt
%matplotlib inlinefig_RF,ax_RF = plt.subplots(figsize=(10,4))
ax_RF.set_title("Mean Absolute Error with Number of Estimators of a Random Forest")
ax_RF.set_xlabel("Number of Estimators")
ax_RF.set_ylabel("Mean Absolute Error")
plt.plot(list(scores.keys()),list(scores.values()))
best_n_estimators = 0for n_estimators,score in scores.items():if score == min(scores.values()):best_n_estimators = n_estimatorsprint(f"Best Number of Estimators:{n_estimators}")
RF_model = RandomForestClassifier(n_estimators=best_n_estimators,random_state=2019)
RF_model.fit(X_train,y_train)
RF_predictions = RF_model.predict(X_test)
RF_mae = mean_absolute_error(RF_predictions,y_test)print(f"Mean Absolute Error:{RF_mae}")
print(classification_report(y_test,RF_predictions))
from sklearn.model_selection import GridSearchCVparam_grid = {"n_estimators":[120,140,300,500,800,1200]}
RF_model_new = RandomForestClassifier(random_state=2019)
RF_grid = GridSearchCV(RF_model_new,param_grid,verbose=1,n_jobs=-1,cv=3,scoring="neg_mean_absolute_error")
RF_grid.fit(X_train,y_train)

RF_grid.best_params_
RF_model = RandomForestClassifier(n_estimators=140,random_state=2019)
RF_model.fit(X_train,y_train)
RF_predictions = RF_model.predict(X_test)
RF_mae = mean_absolute_error(RF_predictions,y_test)print(f"Mean Absolute Error:{RF_mae}")
print(classification_report(y_test,RF_predictions))


ExtraTreeClassifier

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import ExtraTreesRegressor
scores = {}for n_estimators in range(10,600,10):extra_model = ExtraTreesClassifier(n_estimators=n_estimators,random_state=2019)extra_model.fit(X_train,y_train)extra_predictions = extra_model.predict(X_test)extra_mae = mean_absolute_error(extra_predictions,y_test)scores[n_estimators] = extra_mae
import matplotlib.pyplot as plt
%matplotlib inlinefig_RF,ax_RF = plt.subplots(figsize=(10,4))
ax_RF.set_title("Mean Absolute Error with Number of Estimators of a Random Forest")
ax_RF.set_xlabel("Number of Estimators")
ax_RF.set_ylabel("Mean Absolute Error")
plt.plot(list(scores.keys()),list(scores.values()))

best_n_estimators = 0for n_estimators,score in scores.items():if score == min(scores.values()):best_n_estimators = n_estimatorsprint(f"Best Number of Estimators:{n_estimators}")

Best Number of Estimators:150

extra_classifier = ExtraTreesClassifier(n_estimators=530,random_state=2019)
extra_classifier = extra_classifier.fit(X_train,y_train)
extra_classifier_prediction = extra_classifier.predict(X_test)
extra_mae = mean_absolute_error(extra_classifier_prediction,y_test)
print(f"Mean Absolute Error:{extra_mae}")

Mean Absolute Error:0.16304347826086957

Ensemble
K-Nearest Neighbors

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import scipy.stats as st
from sklearn.neighbors import KNeighborsClassifierparam_grid = {"n_neighbors":st.randint(1,40),"weights":["uniform","distance"]}KN_model = KNeighborsClassifier()
KN_grid = RandomizedSearchCV(KN_model,param_grid,verbose=1,n_jobs=-1,cv=3)
KN_grid.fit(X_train,y_train)

print(KN_grid.best_params_)

{‘n_neighbors’: 32, ‘weights’: ‘distance’}

KN_model = KNeighborsClassifier(n_neighbors=30,weights="distance")
KN_model.fit(X_train,y_train)
KN_predictions = KN_model.predict(X_test)
KN_mae = mean_absolute_error(KN_predictions,y_test)print(f"Mean Absolute Error:{KN_mae}")

Mean Absolute Error:0.5441176470588235

Logistic Regression

from sklearn.linear_model import LogisticRegressionlogistic_regression = LogisticRegression()
logistic_regression.fit(X_train,y_train)
logistic_prediction = logistic_regression.predict(X_test)
logistic_mae = mean_absolute_error(logistic_prediction,y_test)
print(f"Mean Absolute Error:{logistic_mae}")

Mean Absolute Error:0.5970588235294118

LinearRegression

from sklearn.linear_model import LinearRegressionlin_regression = LinearRegression()
lin_regression.fit(X_train,y_train)
lin_prediction = lin_regression.predict(X_test)
lin_mae = mean_absolute_error(lin_prediction,y_test)
print(f"Mean Absolute Error:{lin_mae}")

Mean Absolute Error:0.6353137915187634

ElasticNet

from sklearn.linear_model import ElasticNetela_regression = ElasticNet()
ela_regression.fit(X_train,y_train)
els_prediction = ela_regression.predict(X_test)
ela_mae = mean_absolute_error(els_prediction,y_test)
print(f"Mean Absolute Error:{ela_mae}")

Mean Absolute Error:0.698948525093073

PolynomialFeatures

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegressionpoly_features = PolynomialFeatures(degree=5,include_bias=False)X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly,y_train)
poly_reg_prediction = poly_reg.predict(X_test_poly)
poly_reg_mae = mean_absolute_error(poly_reg_prediction,y_test)
print(f"Mean Absolute Error:{poly_reg_mae}")

Mean Absolute Error:19.971436771294478

DL模型

# 模型定义
from keras.models import Sequential
from keras.layers import Dense,Dropoutmodel = Sequential()# model.add(Dense(128,activation="relu"))
# model.add(Dropout(0.2))
# model.add(Dense(64,activation="relu"))
# model.add(Dropout(0.2))
model.add(Dense(32,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(16,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1))
from keras.callbacks import EarlyStopping,ReduceLROnPlateau,ModelCheckpoint,LearningRateSchedulercheckpoint = ModelCheckpoint("dl.h5",monitor="val_loss",mode="min",save_best_only = True,verbose=1)earlystop = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5,verbose = 1,restore_best_weights = True)reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',factor = 0.2,patience = 3,verbose = 1)#min_delta = 0.00001)callbacks = [earlystop, checkpoint, reduce_lr]
# 模型编译
model.compile(loss="mae",optimizer="adam",metrics=["mae"])
# 模型训练
model.fit(X_train,y_train,epochs=100,batch_size=1,verbose=1,validation_data=(X_test,y_test),callbacks=callbacks)

【人工智能项目】深度学习实现白葡萄酒品质预测相关推荐

  1. 热门 | Google Brain前员工深度盘点2017人工智能和深度学习各大动态

    翻译 | AI科技大本营 参与 | shawn 编辑 | Donna 2017年是人工智能井喷的一年.Google Brain团队前成员Denny Britz在自己的博客WILDML上对过去一年人工智 ...

  2. H2O机器学习:一种强大的可扩展的人工智能和深度学习技术

    书名:基于H2O的机器学习实用方法:一种强大的可扩展的人工智能和深度学习技术 原书名:Practical Machine Learning with H2O:Powerful, Scalable Te ...

  3. 深度学习决策支持 时空预测_重工业的预测性维护和决策支持系统

    深度学习决策支持 时空预测 Digital transformation is one of the top priorities for industrial companies. The larg ...

  4. 【人工智能】深度学习、数据库选择和人工智能的革命;人工智能是解锁IoT潜力的钥匙

    深度学习(DL)和人工智能(AI)已经不再是科幻小说中遥不可及的目标,目前已成为了互联网和大数据等领域的前沿研究内容. 由于云计算提供强的计算能力.提出的先进算法以及充裕的资金,这创造了五年前难以想象 ...

  5. 人工智能和深度学习发展趋势_AI在学习和发展中的作用

    人工智能和深度学习发展趋势 In this series of blogs, AI in HR, we already understood what is AI, what is HR and be ...

  6. 深度学习(一): 人工智能-机器学习-深度学习的区别

    人工智能-机器学习-深度学习 他们之间是有区别的 先来一张图做一下解释 从发展历史上来看 AI:让机器展现出人类智力 回到1956年夏天,在当时的会议上,AI先驱的梦想是建造一台复杂的机器(让当时刚出 ...

  7. 2018 年,关于深度学习的 10 个预测

    我有一种预感:2018年,所有的事情都会发生巨变.我们在2017年看到的深度学习取得的惊人突破将会以一种强大的方式延续到2018年.2017年在深度学习领域的研究成果将会应用于日常的软件应用中. 下面 ...

  8. 图解人工智能机器学习深度学习的关系和区别

    图解人工智能机器学习深度学习的关系和区别,先直观看下图的关系: AI(Artificial Intelligence.人工智能).机器学习(machine learning).深度学习(Deep le ...

  9. AlphaGo、人工智能、深度学习解读以及应用

    经过几天的比拼,AlphaGo最终还是胜出,创造了人机大战历史上的一个新的里程碑.几乎所有的人都在谈论这件事情,这使得把"人工智能"."深度学习"的热潮推向了新 ...

最新文章

  1. 基于DPI(深度报文解析)的应用识别
  2. 表单提交时有的字段可以传递到后台有的不可以
  3. LeetCode 26 删除有序数组中的重复项
  4. 修改thymeleaf默认路径
  5. webflux 对url参数的接收处理
  6. vue替换全部符合’字符串_技术成长日记-Vim实用技巧-4.7查找替换
  7. 步骤一:入门linux基础/01Linux简介和安装/002Linux发行版的介绍
  8. android返回按钮实现,Android实现返回键操作思路
  9. matlab判断传递函数的稳定性,基于Matlab的控制系统稳定性判定.pdf
  10. 小米8对一加6打开软件速度测试,小米 8 对决一加 6,谁更值得买?
  11. 路科sv练习2-类的继承
  12. 配置 Raspberry PI WiFi
  13. 计算机毕业设计之 少儿编程学习平台的设计与实现
  14. Learning to Localize Sound Sources in Visual Scenes: Analysis and Applications
  15. 学习笔记-基于全局和局部对比自监督学习的高分辨率遥感图像语义分割-day1
  16. XSSFWorkbook下载excel表格
  17. fixture ‘xxx‘ not found
  18. 陆奇演讲:2021不能错过的四大趋势
  19. 一段式、两段式和三段式状态机
  20. 推荐2本普通人参悟的书

热门文章

  1. 20135203齐岳 信息安全系统设计基础第四周学习总结
  2. 宾得rtk手簿说明书_那曲宾得RTK操作说明
  3. style.left和offsetLeft 用法
  4. 南卫理公会大学计算机科学,南卫理公会大学计算机科学与工程硕士.pdf
  5. Lync 客户端单独安装激活步骤
  6. Java容器类 Collection (set list queue)和map
  7. yum -- Failed connect to mirrors.aliyuncs.com:80; No route to host
  8. 如何看待人生与技术的价值
  9. Excel导入SqlServer2012提示“消息7314”
  10. 全系列毕业设计论文来了