ProTICS包的介绍（根据生信技能树Jimmy老师分享的乳腺癌分子分型包资料整理，感谢Jimmy老师！）

1、设置环境
2、Part1的结果
3、Part2的结果
4、Part3的结果
5、相关函数

亮点：尽管对选定组织学亚型中肿瘤浸润淋巴细胞的预后相关性进行了大量研究，但很少有研究系统地报道了免疫细胞在分子亚型中的预后影响，如机器学习方法对多组学数据集的量化。本文描述了一种新的计算框架ProTICS，以量化肿瘤微环境中免疫细胞比例的差异，并估计它们在不同亚型中的预后效应。
期刊: Briefings in Bioinformatics
论文：ProTICS reveals prognostic impact of tumor infiltrating immune cells in different molecular subtypes
Github link: https://github.com/liu-shuhui/ProTICS

ProTICS是由三部分组成的，三部分各有目的。后面部分的执行取决于前面部分的结果。

1、设置环境

将GitHub上的包文件下载下来

#请安装下面的包
library(data.table)
library(dplyr)
library(rTensor)
library(nnTensor)
library(survival)
library(survminer)
library(edgeR)
library(limma)
library(Glimma)
library(gplots)
library(org.Mm.eg.db)
library(grDevices)
library(pheatmap)
library(forestplot)

2、Part1的结果

# 通过运行NTD方法发现分子亚型。这个例子中，患者被分为两种癌症亚型。
# 可视化两种癌症亚型的总体生存分析#输入数据
data1<-fread(file = "./Data/data1.txt",header = T)  ##读取基因表达数据
data2<-fread(file = "./Data/data2.txt",header = T)  ##读取DNA甲基化数据
clinicdata<-fread(file ="./Data/clinic_Data.txt",header = T)
colnames(clinicdata)<-c("patient_id", "death", "survival")source("./R/functions/normalization.R")
source("./R/functions/NTD_subtyping.R")
## k=2 是一个示例
Subtype= NTD_subtyping(data1,data2,k=2, n=100)survivaldata<-cbind(clinicdata,Subtype)
write.table(survivaldata, file = "overallsurvival_subtypes.txt",sep = "\t", col.names = T, quote = F, row.names = F)
survdiff(Surv(survival,death)~Subtype, data=survivaldata)
survival_out<-survfit(Surv(survival,death)~Subtype, data=survivaldata)
ggsurvplot(survival_out, data = survivaldata, risk.table = T,xlab="Survival time/day", ylab="Survival rate")

3、Part2的结果

# 两种癌症亚型之间特征基因的差异表达（DE）分析，可视化所选DE基因的热图
sig_expr <- fread("./Data/signature_count.txt",sep = "\t",header = TRUE) #行是特征基因
survival_data <- fread("overallsurvival_subtypes.txt", sep = "\t",header = TRUE)
subtypes<-survival_data$SubtypeID<- which(subtypes==1 | subtypes==2)
Surv<-survival_data[ID,]
seqd<-dplyr::select(sig_expr,c(colnames(sig_expr)[1],Surv$patient_id))   #select用dplyr::select
source("./R/functions/subtypes_DEA.R")
GS<-subtypes_DEA(Surv,seqd)# 差异表达基因的热图
sig_expr<-sig_expr[is.element(sig_expr$symbol,GS),]
IDD<-c(which(subtypes==1),which(subtypes==2))
survd_new<-survival_data[IDD,]
sigdata<-dplyr::select(sig_expr,c(colnames(sig_expr)[1],survd_new$patient_id))  #dplyr::selectanno_c<-data.frame(Types = factor(survd_new$Subtype,c("1","2"),c("Sub1","Sub2")))
colnames(anno_c)<-c("  ")
row.names(anno_c)<-survd_new$patient_idsource("./R/functions/normalization.R")
data<-normalization(log2(sigdata[,-1]+1))rownames(data)<-sigdata$symbol
pheatmap(data,cluster_rows=T,color = colorRampPalette(c( "#0077FF","#FFEEFF","#FF7700"))(1000),cluster_cols=F,show_rownames = TRUE,show_colnames=F,annotation=anno_c,annotation_legend=TRUE,main="dataset")

4、Part3的结果

#1、10种免疫细胞在不同分子亚型中的比例分布
#2.1使用单因素cox回归分析subtypes1型中单免疫细胞预后
#2.2使用多变量cox回归分析subtypes1中10种免疫细胞类型的预后survdata <- fread("./output/overallsurvival_subtypes.txt", sep = "\t",header = TRUE)
cell<-fread(file = "./Data/CellProportion.txt", sep = "\t",header = T)# 删掉不是免疫细胞类型的[16:18]列。
cell<-cell[,-c(16:18)]id=which(apply(cell[,-1],2,var)>1e-05)+1  # 去除方差非常小的列。
cell_new<-dplyr::select(cell,c(colnames(cell)[c(1,id)]))  #dplyr::select# 免疫细胞类型的列名
covariates<-c("`CD4 Naive`","`CD4 Memory`","`CD8 Memory`","`CD8 Effector`", "`Th cell`", "`Monocytes CD16`","`Monocytes CD14`","DC","pDC","Plasma")## 1. 绘制10种免疫细胞在不同分子亚型中的比例分布
`Cell types` = c(rep(covariates, each=length(which(survdata$Subtype==1))),rep(covariates, each=length(which(survdata$Subtype==2))))
`Patient type` = c(rep(c("Subtyp1"),each=length(which(survdata$Subtype==1))*10),rep(c("Subtyp2"),each=length(which(survdata$Subtype==2))*10))ID1<-sapply(survdata$patient_id[which(survdata$Subtype==1)],function(x) which(cell_new$Mixture==x))
ID2<-sapply(survdata$patient_id[which(survdata$Subtype==2)],function(x) which(cell_new$Mixture==x))`Relative proportions of the 10 immune cell types` <-c(as.vector(as.matrix(cell_new[ID1,-1])),as.vector(as.matrix(cell_new[ID2,-1])))data<-data.frame(`Cell types`,`Patient type`,`Relative proportions of the 10 immune cell types`)data$Cell.type <- factor(data$Cell.type,levels=covariates,ordered = TRUE)
ggplot(data, aes(`Cell types`, y=`Relative proportions of the 10 immune cell types`, color=`Patient type`)) +theme(panel.background = element_rect(linetype = 1, colour = "white", size = 1,fill = "lightblue"),axis.text.x = element_text(angle = 20, hjust = 0.6,vjust = 0.75),plot.title = element_text(colour = "black",face = "bold",size = 12, vjust = 1),plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "inches"))+stat_boxplot(geom ='errorbar', width = 0.8) +geom_boxplot(width = 0.8)
facet_grid(.~Cell.type, scales = "free_x")

## 2. 免疫细胞类型预后关联的森林图surv_sub<-survdata[which(survdata$Subtype==1),]
surv_sub$survival<-scale(surv_sub$survival,center = FALSE, scale = TRUE)ID<-sapply(surv_sub$patient_id, function(x) which(cell_new$Mixture==x))
cell_new<-cell_new[ID,-1]
#cell_new<-logcell<-log2(cell_new+1)
cutoff<-as.matrix(apply(cell_new,2,median))tem<-t(replicate(dim(cell_new)[1],cutoff[,1]))
mat_bip<-as.matrix(cell_new>tem)
mat_bip[mat_bip==TRUE]<-1data1<-cbind(surv_sub,mat_bip)# 2.1单变量 cox 回归
source("./R/functions/uni_cox.R")
result<-uni_cox(covariates,data1)
res1<-result[[1]]
res2<-result[[2]]
# 森林图
forestplot(res1, mean = res2$HR, lower = res2$lower, upper = res2$upper,graph.pos = 2,graphwidth = unit(18,"mm"),hrzl_lines = list("2" = gpar(lty=2,columns=1:4)),is.summary = c(TRUE,rep(FALSE,10)),txt_gp = fpTxtGp(ticks = gpar(cex=0.8),summary = gpar(cex=0.8),cex = 0.8),boxsize = 0.2,line.margin = unit(6,"mm"),lineheight = unit(6,"mm"),col=fpColors(box="blue",line="blue",summary="blue"),clip = c(0,5),xticks = c(0, 0.5, 1, 2,3,4,5),lwd.ci=2, ci.vertices=TRUE, ci.vertices.height = 0.12,colgap = unit(2,"mm"),zero = 1,title = "Subtype 1")

# 2.2多变量cox回归
source("./R/functions/multi_cox.R")
result<-multi_cox(covariates,data1)
res1<-result[[1]]
res2<-result[[2]]
# 森林图
forestplot(res1, mean = res2$HR, lower = res2$lower, upper = res2$upper,graph.pos = 2,graphwidth = unit(18,"mm"),hrzl_lines = list("2" = gpar(lty=2,columns=1:4)),is.summary = c(TRUE,rep(FALSE,10)),txt_gp = fpTxtGp(ticks = gpar(cex=0.8),summary = gpar(cex=0.8),cex = 0.8),boxsize = 0.2,line.margin = unit(6,"mm"),lineheight = unit(6,"mm"),col=fpColors(box="blue",line="blue",summary="blue"),clip = c(0,5),xticks = c(0, 0.5, 1, 2,3,4,5),lwd.ci=2, ci.vertices=TRUE, ci.vertices.height = 0.12,colgap = unit(2,"mm"),zero = 1,title = "Subtype 1")

5、相关函数

# 1、NTD_subtyping  NTD分型
# 该函数用于整合多组学，执行非负Tucker分解算法，然后通过matrice_B将患者分配到不同的组。NTD_subtyping <- function(data1,data2,k,n){## 定义一个三模张量arr <- array(0,dim = c(dim(data1[,-1]),2)) # 行：基因；列：病人（样本）arrT <- as.tensor(arr)arrT[,,1] <- unlist(normalization(data1[,-1]))arrT[,,2] <- unlist(normalization(data2[,-1]))##k：亚型的数量；n：交互步数（默认值：100）output <- NTD(arrT, rank=c(k, k, k),num.iter=n)  ## matrice_B保存了患者的潜在因素信息matrice_B<-t(output$A[[2]])## 亚型信息group<-max.col(matrice_B)   return(group)
}# 2、multi_cox 多因素cox
multi_cox<-function(covariates,data){res.cox <- coxph(Surv(survival, death) ~ `CD4 Naive` + `CD4 Memory` + `CD8 Memory`+`CD8 Effector`+`Th cell`+`Monocytes CD16`+`Monocytes CD14`+DC+pDC+Plasma, data =  data1)#summary(res.cox)multi_res <- summary(res.cox)res1 <- cbind(colnames(cell_new),multi_res[["coefficients"]][,c(2,5)])res2<-multi_res[["conf.int"]][,-2]HR <-round(res2[,1], digits=2);#exp(beta)HR.confint.lower <- round(res2[,2], 2)HR.confint.upper <- round(res2[,3],2)res1[,2] <- paste0(HR, " [",HR.confint.lower, "-", HR.confint.upper, "]")res1[,3]<-format(as.numeric(res1[,3]), scientific = TRUE, digits = 2)res1 <- rbind(c("Immune cells","HR 95% CI","P.value"),res1)res2<-data.table(rbind(c(NA,NA,NA),res2))colnames(res2)<-c("HR", "lower","upper")result<-list(res1,res2)return(result)
}# 3、normalization归一化
# 这是将数据映射到（0,1）的归一化函数。
# 可以在http://r-pkgs.had.co.nz/了解有关使用RStudio编写软件包的更多信息。normalization<-function(x) {min_v  <- min(x)max_v <- max(x)A<-x-replicate(dim(x)[2],min_v)B<-replicate(dim(x)[2],(max_v-min_v))return(A/B)
}# 4、subtypes_DEA差异表达分析
subtypes_DEA <- function(Surv,seqd){## 定义一个三模张量group<-factor(Surv$Subtype,c("1","2"),c("Subtype_1","Subtype_2"))design<-model.matrix(~0+group)colnames(design)<-c("Subtype_1","Subtype_2")#y <- cpm(seqd[,-1],log = TRUE)y <- voom(seqd[,-1], design, plot = F)fit <- lmFit(y, design)contr <- makeContrasts(Subtype_1-Subtype_2, levels = design)tmp <- contrasts.fit(fit, contr)tmp <- eBayes(tmp)res <- topTable(tmp, sort.by = "P", n = Inf)rownames(res)<-seqd$symbol[as.numeric(rownames(res))]T<-res[which(abs(res$logFC)>=1 & (res$adj.P.Val < 1e-2)),]if (dim(T)[1]<=20){GS<-rownames(T)} else {T<-cbind(rownames(T),T)colnames(T)[1]<-c("Genes")res<-arrange(T,desc(abs(T$logFC)))GS<-as.character(res[1:20,1])}return(GS)
}# 5、单因素cox
uni_cox<-function(covariates,data){univ_formulas <- sapply(covariates,function(x) as.formula(paste('Surv(survival,death)~', x)))univ_models <- lapply( univ_formulas, function(x){coxph(x, data = data1)})univ_results <- lapply(univ_models,function(x){x <- summary(x)p.value<-format(x$wald["pvalue"], scientific = TRUE,digits = 3)#wald.test<-signif(x$wald["test"], digits=2)#beta<-signif(x$coef[1], digits=2);#coeficient betaHR <-round(x$coef[2], digits=2);#exp(beta)HR.confint.lower <- round(x$conf.int[,"lower .95"], 2)HR.confint.upper <- round(x$conf.int[,"upper .95"],2)HR1 <- paste0(HR, " [",HR.confint.lower, "-", HR.confint.upper, "]")res.cox<-c(HR, HR.confint.lower,HR.confint.upper,HR1,p.value)names(res.cox)<-c("HR", "lower","upper","HR [95% CI for HR]","p.value")return(res.cox)#return(exp(cbind(coef(x),confint(x))))})univ_res <- t(as.data.frame(univ_results, check.names = F))res1<-rbind(c("Immune cells","HR 95% CI","P.value"),cbind(colnames(cell_new),format(univ_res[,c(4,5)],scientific = TRUE,digits = 3)))res2<-data.table(rbind(c(NA,NA,NA),univ_res[,c(1,2,3)]))result<-list(res1,res2)return(result)
}

ProTICS包的介绍（根据生信技能树Jimmy老师分享的乳腺癌分子分型包资料整理）相关推荐

CancerSubtypes包的介绍（根据生信技能树Jimmy老师分享的乳腺癌分子分型包资料整理）
CancerSubtypes包的介绍(根据生信技能树Jimmy老师分享的乳腺癌分子分型包资料整理,感谢Jimmy老师!) 1. 引言 2. 数据处理 2.1 基本处理 2.1.1 通过检查数据分布来分 ...
生信技能树电脑配置linux,生信技能树--Jimmy - Linux 20题
一.在任意文件夹下面创建形如 1/2/3/4/5/6/7/8/9 格式的文件夹系列. 二.在创建好的文件夹下面,比如我的是 /Users/jimmy/tmp/1/2/3/4/5/6/7/8/9 ,里面 ...
R：生信技能树学习笔记一
生信技能树小破站:R应该这样学1-4 1.查看已经安装的包的地址 .libPaths() 2.怎么查看函数用法 #在RStudio的右下角窗口的help可以看到 ?函数名 3.三个有用的函数 1.he ...
R：生信技能树学习笔记二
生信技能树小破站:R应该这样学5-7 1.热图 rm(list=ls()) library(pheatmap) a1=rnorm(100) dim(a1)=c(5,20) #设置维度 pheatmap ...
生信技能树【代码大全搜录】
生信技能术代码大全: rm(list = ls()) options()$repos options()$BioC_mirror #options(BioC_mirror="https:// ...
python表情包多样化聊天室_Python | 信不信我分分钟批量做你大堆的表情包？
作者 | 依然很拉风作为一个数据分析师,应该信奉一句话----"一图胜千言".不过这里要说的并不是数据可视化,而是一款全民向的产品形态----表情包!!!! 表情包不仅仅是一种符 ...
生信技能树linux虚拟机,科学网—Windows10安装Linux子系统Ubuntu 20.04LTS，轻松使用生信软件，效率秒杀虚拟机 - 刘永鑫的博文...
很多优秀的生物信息学软件,如QIIME.QIIME 2.LEfSe等没有Windows版,而使用VirutalBox虚拟机不仅效率低,而且挂载外部硬盘和使用中也经常遇到各种问题,配置和使用详见 - 扩 ...
生信技能树课程记录笔记（七）20220531
一.数据框排序法一:sort函数默认升序. 例:sort(test$Sepal.Length) 法二:order函数默认升序,返回数据下标组成的数组. 可以给向量排序,也可以给数据框排序例:t ...
TCGA学习笔记一(生信技能树概述版)
1.背景介绍重要数据外显子数据表达数据小RNA测序数据拷贝数芯片甲基化数据蛋白质组学数据临床信息癌症背景知识网页工具大全 GDC cbioportal:按照paper来分类的 UC ...

ProTICS包的介绍（根据生信技能树Jimmy老师分享的乳腺癌分子分型包资料整理）