1. 准备数据

准备数据Data.xlsx，示例如图

Age	AL	ACD	K1	K2	WTW	LT	Ref	IOLPower
68.00	22.68	2.21	42.44	42.75	11.20	4.82	-1.13	26.50
62.00	23.79	3.42	43.93	45.51	11.70	4.49	-0.11	19.50
62.00	23.82	3.12	44.08	44.53	12.00	4.90	-0.86	21.00
68.00	22.56	2.30	42.65	43.13	11.00	4.65	-0.37	25.50
73.00	23.50	2.38	43.43	43.84	11.40	4.85	-0.26	21.50
74.00	23.09	2.91	43.86	44.74	11.90	4.45	-0.53	23.00
73.00	24.61	3.57	41.70	42.23	11.90	3.49	-0.80	21.00
64.00	22.47	2.89	46.63	48.01	11.10	4.41	-0.52	21.00
63.00	21.35	2.67	43.89	44.53	12.20	4.68	-0.58	29.50

2. 建立模型

2.1 方法1

直接建立Age,AL,ACD,K1,K2,WTW,LT,Ref与IOLPower之间的关系

将使用多种人工智能模型，最后将表现最优的三种叠加在一起。

开始先看一下各自变量之间的相关性：

可以看到自变量之间还是有一定相关性的，比如LT(Lens Thickness)与Age正相关，ACD与AL正相关等。

rm(list=ls())
# load libraries
library(caret)
library(corrplot)
library(tidyverse)
library(xlsx)
# Split out validation dataset
set.seed(123)
df <- read.xlsx("./Data.xlsx",sheetName ="Sheet1",stringsAsFactors = FALSE)
str(df)
names(df)<-c("Age","AL","ACD","K1","K2","WTW","LT","IOLPower","Ref")
cor(df[c("Age","AL","ACD","K1","K2","WTW","LT")])
pairs(df[c("Age","AL","ACD","K1","K2","WTW","LT")])
# more informative scatterplot matrix
library(psych)
pairs.panels(df[c("Age","AL","ACD","K1","K2","WTW","LT")])train.idx <- sample(1:nrow(df), 0.7*nrow(df))
train.df <- as_tibble(df[train.idx,])
test.df <- as_tibble(df[-train.idx,])
train_x = train.df %>% select(-'IOLPower')
train_y = train.df[,'IOLPower']
test_x = test.df %>% select(-'IOLPower')
test_y = test.df[,'IOLPower']
# Run algorithms using 10-fold cross validation
control <- trainControl(method="repeatedcv", number=10, repeats=3, search="random")
metric <- "RMSE"
# lm
set.seed(123)
fit_lm <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data=train.df, method="lm",
metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_lm
# SVM
set.seed(123)
fit_svm <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df,
method="svmRadial", metric=metric, preProc=c("center", "scale"), tuneLength=15,
trControl=control)
fit_svm# ANNs
fit_nnet <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df, method="nnet",metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_nnet
if(FALSE)
{pred_Power <-predict(fit_nnet,test_x)print(pred_Power)err<- test_y$IOLPower-pred_Powerprint(err)plot(err)
}# random forest
set.seed(123)
fit_rf <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.df, method="rf",
metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_rf
# CART
set.seed(123)
fit_cart <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df, method="rpart",
metric=metric,preProc=c("center", "scale"),tuneLength=15, trControl=control)
fit_cart
# kNN
set.seed(123)
fit_knn <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df, method="knn",
metric=metric, preProc=c("center", "scale"),tuneLength=15, trControl=control)
fit_knn
#MARS
set.seed(123)
fit_MARS<- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df,
method="earth", metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_MARS
# GBM
set.seed(123)
fit_gbm <- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.df, method =
"gbm",metric=metric,preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_gbm
#cubist
set.seed(123)
trControl=control
fit_cubist<- train(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.df, method ="cubist",metric=metric,preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_cubist
# Compare algorithms
results <- resamples(list(LM = fit_lm, CART = fit_cart, KNN= fit_knn, RF = fit_rf, SVR = fit_svm,
GBM = fit_gbm, MARS= fit_MARS, Cubist = fit_cubist,ANNs=fit_nnet))
summary(results)
dotplot(results)
# Stacking
library(caretEnsemble)
set.seed(123)allModels <- caretList(IOLPower ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.df,trControl
= trainControl("repeatedcv", number = 10, repeats = 3, search="random",verboseIter = T),metric =
"RMSE", tuneList = list(cubist = caretModelSpec(method = "cubist", tuneLength = 15, preProc =
c("center","scale")),svmRadial = caretModelSpec(method = "svmRadial", tuneLength = 15, preProc
= c("center","scale")),Earth = caretModelSpec(method = "earth", tuneLength = 15, preProc =
c("center","scale"))))bwplot(resamples(allModels), metric = "RMSE")
summary(allModels)
# Stack with lm
set.seed(123)
stack <- caretStack(allModels, metric = "RMSE", method = "lm", tuneLength = 10,trControl =
trainControl("repeatedcv", number = 5, repeats = 3, verboseIter = T))
summary(stack)
print(stack)if(TRUE){ggplot(varImp(fit_lm))pred_Power <-predict(stack,test_x)print(pred_Power)plot(pred_Power)pred_err <- test_y$IOLPower - pred_Powerprint(pred_err)summary(pred_err)plot(pred_err)
}

运行结果误差还是很小的。

2.2 方法2

结合光学公式与人工智能（此法源自中山的一篇论文）

光学人工晶体计算公式如下

Power =1336 / (AL- ELP) - (1336/(1336 /(1000 /(1000 /Ref-12)+K)-ELP))

临床实践中最难的就是预测ELP(Effective Lens Position，有效人工晶状体位置，个人觉得"等效人工晶状体位置"这个翻译可能更好)，以下代码中Ratio指

Ratio =(ELP-ACD)/LT

下面代码的各个模型都是为了预测这个Ratio，最后求得ELP，然后代入上述光学计算公式中得到最后的人工晶状体度数。

if (TRUE){rm(list=ls())library(xlsx)data <- read.xlsx("./Data.xlsx",sheetName ="Sheet1",stringsAsFactors = FALSE)f<-function(ELP,AL,K,Ref,P){1336 / (AL- ELP) - (1336/(1336 /(1000 /(1000 /Ref-12)+K)-ELP))-P}root<-c()for(i in 1:nrow(data)){root[i]<-uniroot(f,c(0,20),AL=data$AL[i],K=(data$K1[i]+data$K2[i])/2,R=data$Ref[i],P=data$IOLPower[i])$root}rootdata$ELP <-rootRatio <- (data$ELP-data$ACD)/data$LTRatiodata$Ratio<-RatioPowerCalc<-function(ELP,AL,K,Ref){1336 / (AL- ELP) - (1336/(1336 /(1000 /(1000 /Ref-12)+K)-ELP))}P<-PowerCalc(data$ELP,data$AL,(data$K1+data$K2)/2,data$Ref)Poptions(scipen = 100) #小数点后100位不使用科学计数法err <- data$IOLPower - Psummary(err)plot(err)
}library(caret)
library(corrplot)
library(tidyverse)
library(xlsx)
# Split out validation dataset
set.seed(123)
cor(data[c("Age","AL","ACD","K1","K2","WTW","LT")])
pairs(data[c("Age","AL","ACD","K1","K2","WTW","LT")])
# more informative scatterplot matrix
library(psych)
pairs.panels(data[c("Age","AL","ACD","K1","K2","WTW","LT")])train.idx <- sample(1:nrow(data), 0.7*nrow(data))
train.data <- as_tibble(data[train.idx,])
test.data <- as_tibble(data[-train.idx,])
train_x = train.data %>% select(-'Ratio')
train_y = train.data[,'Ratio']
test_x = test.data %>% select(-'Ratio')
test_y = test.data[,'Ratio']
# Run algorithms using 10-fold cross validation
control <- trainControl(method="repeatedcv", number=10, repeats=3, search="random")
metric <- "RMSE"
# lm
set.seed(123)
fit_lm <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data=train.data, method="lm",metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_lm
# SVM
set.seed(123)
fit_svm <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data,method="svmRadial", metric=metric, preProc=c("center", "scale"), tuneLength=15,trControl=control)
fit_svm
plot(fit_svm)
# ANNs
fit_nnet <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data, method="nnet",metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_nnet
if(TRUE)
{pred <-predict(fit_nnet,test_x)print(pred)err<- test_y$Ratio-predprint(err)plot(err)
}# random forest
set.seed(123)
fit_rf <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.data, method="rf",metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_rf
# CART
set.seed(123)
fit_cart <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data, method="rpart",metric=metric,preProc=c("center", "scale"),tuneLength=15, trControl=control)
fit_cart
# kNN
set.seed(123)
fit_knn <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data, method="knn",metric=metric, preProc=c("center", "scale"),tuneLength=15, trControl=control)
fit_knn
#MARS
set.seed(123)
fit_MARS<- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data,method="earth", metric=metric, preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_MARS
# GBM
set.seed(123)
fit_gbm <- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.data, method ="gbm",metric=metric,preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_gbm
#cubist
set.seed(123)
trControl=control
fit_cubist<- train(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref, data = train.data, method ="cubist",metric=metric,preProc=c("center", "scale"), tuneLength=15, trControl=control)
fit_cubist
# Compare algorithms
results <- resamples(list(LM = fit_lm, CART = fit_cart, KNN= fit_knn, RF = fit_rf, SVR = fit_svm,GBM = fit_gbm, MARS= fit_MARS, Cubist = fit_cubist,ANNs=fit_nnet))
summary(results)
dotplot(results)
# Stacking
library(caretEnsemble)
set.seed(123)allModels <- caretList(Ratio ~ Age+AL+ACD+K1+K2+WTW+LT+Ref,data=train.data,trControl= trainControl("repeatedcv", number = 10, repeats = 3, search="random",verboseIter = T),metric ="RMSE", tuneList = list(cubist = caretModelSpec(method = "cubist", tuneLength = 15, preProc =c("center","scale")),svmRadial = caretModelSpec(method = "svmRadial", tuneLength = 15, preProc= c("center","scale")),Earth = caretModelSpec(method = "earth", tuneLength = 15, preProc =c("center","scale"))))bwplot(resamples(allModels), metric = "RMSE")
summary(allModels)
# Stack with lm
set.seed(123)
stack <- caretStack(allModels, metric = "RMSE", method = "lm", tuneLength = 10,trControl =trainControl("repeatedcv", number = 5, repeats = 3, verboseIter = T))
summary(stack)
print(stack)if(TRUE){ggplot(varImp(fit_nnet))pred_Ratio <-predict(stack,test_x)print(pred_Ratio)plot(pred_Ratio)pred_err1 <- test_y$Ratio - pred_Ratioprint(pred_err1)summary(pred_err1)plot(pred_err1)ELPCalc<-function(Ratio,LT,ACD){Ratio*LT+ACD}ELP = ELPCalc(pred_Ratio,test_x$LT,test_x$ACD)Pred_Power<-PowerCalc(ELP,test_x$AL,(test_x$K1+test_x$K2)/2,test_x$Ref)Pred_err2<-test_x$IOLPower - Pred_Powersummary(Pred_err2)plot(Pred_err2)
}

复现论文也算勉强成功，但是样本量太少，暂无法比较两种方法的优劣。

人工晶状体计算——人工智能算法（R语言）相关推荐

前馈神经网络_BP算法+R语言程序运行实例
前馈神经网络_BP算法+R语言程序运行实例目录关于神经网络的介绍前馈神经网络应用到机器学习参数学习误差反向传播程序实例(R语言) 前言今天是小白学习神经网络的第一次博客文章,希望以后的 ...
r语言nonzerocoef函数_lars算法R语言操作指南.pdf
lars算法R语言操作指南.pdf Package 'lars' February 20, 2015 Type Package Version 1.2 Date 2013-04-23 Title Le ...
python晋江文学城数据分析——标签关联规则分析（Apriori算法+R语言）
在学R语言购物篮分析,突然联想到虽然标签算不得商品,但和商品很相似,可以看看作者设置标签时喜欢把什么标签放一块.由于前文一直用的是python,所以准备接着用python,但是整体弄下来后,发现在可视 ...
统计学习导论之R语言应用（四）：分类算法R语言代码实战
统计学习导论之R语言应用(ISLR) 参考资料: The Elements of Statistical Learning An Introduction to Statistical Learnin ...
oracle层级计算推演,R语言使用层次分析法进行综合指标等级划分
业务临时需要,需要确定多因素影响下的综合权重值,现使用层次分析法和拉格朗日多项式插值算法做简易值计算. 1.建立层次分析结构模型,分析影响综合指标的各个因素,分层级,上层受下层影响,而同层各因素之间基 ...
mcem r语言代码_一个简单文本分类任务-EM算法-R语言
一.问题介绍概率分布模型中,有时只含有可观测变量,如单硬币投掷模型,对于每个测试样例,硬币最终是正面还是反面是可以观测的.而有时还含有不可观测变量,如三硬币投掷模型.问题这样描述,首先投掷硬币A,如 ...
r语言实现模糊c均值算法,R语言基本统计分析方法（包及函数）
转载自:http://blog.csdn.net/s04023083/article/details/40344273 摘要:目前经典的统计学分析方法主要有回归分析,Logistic回归,决策树,支持 ...
敏感性、特异度、α、β、阳性预测值（PPV）、阴性预测值(NPV)等指标及置信区间计算（附R语言代码）
这个虽然简单但老是被绕进去,所以整理一下方便查阅. 首先画一个2×2的混淆矩阵confusion matrix: TP = True positive(真阳性) FP = False positive ...
决策树c5.0算法r语言实现,R C5.0决策树实例转摘
http://www.kaggle.com/c/titanic-gettingStarted/data . new_model C5.0(train[,-2],train$Survived) leve ...

人工晶状体计算——人工智能算法（R语言）

1. 准备数据

2. 建立模型

2.1 方法1

2.2 方法2

人工晶状体计算——人工智能算法（R语言）相关推荐

最新文章

热门文章