CIBERSORT可以从bulk 基因表达数据估计混合细胞群中成员细胞类型的丰度。

CIBERSORT.R 代码

# CIBERSORT R script v1.04 (last updated 10-24-2016)
# Note: Signature matrix construction is not currently available; use java version for full functionality.
# Author: Aaron M. Newman, Stanford University (amnewman@stanford.edu)
# Requirements:
#       R v3.0 or later. (dependencies below might not work properly with earlier versions)
#       install.packages('e1071')
#       install.pacakges('parallel')
#       install.packages('preprocessCore')
#       if preprocessCore is not available in the repositories you have selected, run the following:
#           source("http://bioconductor.org/biocLite.R")
#           biocLite("preprocessCore")
# Windows users using the R GUI may need to Run as Administrator to install or update packages.
# This script uses 3 parallel processes.  Since Windows does not support forking, this script will run
# single-threaded in Windows.
#
# Usage:
#       Navigate to directory containing R script
#
#   In R:
#       source('CIBERSORT.R')
#       results <- CIBERSORT('sig_matrix_file.txt','mixture_file.txt', perm, QN, absolute, abs_method)
#
#       Options:
#       i)   perm = No. permutations; set to >=100 to calculate p-values (default = 0)
#       ii)  QN = Quantile normalization of input mixture (default = TRUE)
#       iii) absolute = Run CIBERSORT in absolute mode (default = FALSE)
#               - note that cell subsets will be scaled by their absolute levels and will not be
#                 represented as fractions (to derive the default output, normalize absolute
#                 levels such that they sum to 1 for each mixture sample)
#               - the sum of all cell subsets in each mixture sample will be added to the ouput
#                 ('Absolute score'). If LM22 is used, this score will capture total immune content.
#       iv)  abs_method = if absolute is set to TRUE, choose method: 'no.sumto1' or 'sig.score'
#               - sig.score = for each mixture sample, define S as the median expression
#                 level of all genes in the signature matrix divided by the median expression
#                 level of all genes in the mixture. Multiple cell subset fractions by S.
#               - no.sumto1 = remove sum to 1 constraint
#
# Input: signature matrix and mixture file, formatted as specified at http://cibersort.stanford.edu/tutorial.php
# Output: matrix object containing all results and tabular data written to disk 'CIBERSORT-Results.txt'
# License: http://cibersort.stanford.edu/CIBERSORT_License.txt#Core algorithm
CoreAlg <- function(X, y, absolute, abs_method){#try different values of nusvn_itor <- 3res <- function(i){if(i==1){nus <- 0.25}if(i==2){nus <- 0.5}if(i==3){nus <- 0.75}model<-svm(X,y,type="nu-regression",kernel="linear",nu=nus,scale=F)model}if(Sys.info()['sysname'] == 'Windows') out <- mclapply(1:svn_itor, res, mc.cores=1) elseout <- mclapply(1:svn_itor, res, mc.cores=svn_itor)nusvm <- rep(0,svn_itor)corrv <- rep(0,svn_itor)#do cibersortt <- 1while(t <= svn_itor) {weights = t(out[[t]]$coefs) %*% out[[t]]$SVweights[which(weights<0)]<-0w<-weights/sum(weights)u <- sweep(X,MARGIN=2,w,'*')k <- apply(u, 1, sum)nusvm[t] <- sqrt((mean((k - y)^2)))corrv[t] <- cor(k, y)t <- t + 1}#pick best modelrmses <- nusvmmn <- which.min(rmses)model <- out[[mn]]#get and normalize coefficientsq <- t(model$coefs) %*% model$SVq[which(q<0)]<-0if(!absolute || abs_method == 'sig.score') w <- (q/sum(q)) #relative space (returns fractions)if(absolute && abs_method == 'no.sumto1') w <- q #absolute space (returns scores)mix_rmse <- rmses[mn]mix_r <- corrv[mn]newList <- list("w" = w, "mix_rmse" = mix_rmse, "mix_r" = mix_r)}#do permutations
doPerm <- function(perm, X, Y, absolute, abs_method){itor <- 1Ylist <- as.list(data.matrix(Y))dist <- matrix()while(itor <= perm){#print(itor)#random mixtureyr <- as.numeric(Ylist[sample(length(Ylist),dim(X)[1])])#standardize mixtureyr <- (yr - mean(yr)) / sd(yr)#run CIBERSORT core algorithmresult <- CoreAlg(X, yr, absolute, abs_method)mix_r <- result$mix_r#store correlationif(itor == 1) {dist <- mix_r}else {dist <- rbind(dist, mix_r)}itor <- itor + 1}newList <- list("dist" = dist)
}#main function
CIBERSORT <- function(sig_matrix, mixture_file, perm=0, QN=TRUE, absolute=FALSE, abs_method='sig.score'){#dependenciesrequire(e1071)require(parallel)require(preprocessCore)if(absolute && abs_method != 'no.sumto1' && abs_method != 'sig.score') stop("abs_method must be set to either 'sig.score' or 'no.sumto1'")#read in dataX <- read.table(sig_matrix,header=T,sep="\t",row.names=1,check.names=F)Y <- read.table(mixture_file, header=T, sep="\t",check.names=F)#to prevent crashing on duplicated gene symbols, add unique numbers to identical namesdups <- dim(Y)[1] - length(unique(Y[,1]))if(dups > 0) {warning(paste(dups," duplicated gene symbol(s) found in mixture file!",sep=""))rownames(Y) <- make.names(Y[,1], unique=TRUE)}else {rownames(Y) <- Y[,1]}Y <- Y[,-1]X <- data.matrix(X)Y <- data.matrix(Y)#orderX <- X[order(rownames(X)),]Y <- Y[order(rownames(Y)),]P <- perm #number of permutations#anti-log if max < 50 in mixture fileif(max(Y) < 50) {Y <- 2^Y}#quantile normalization of mixture fileif(QN == TRUE){tmpc <- colnames(Y)tmpr <- rownames(Y)Y <- normalize.quantiles(Y)colnames(Y) <- tmpcrownames(Y) <- tmpr}#store original mixturesYorig <- YYmedian <- max(median(Yorig),1)#intersect genesXgns <- row.names(X)Ygns <- row.names(Y)YintX <- Ygns %in% XgnsY <- Y[YintX,]XintY <- Xgns %in% row.names(Y)X <- X[XintY,]#standardize sig matrixX <- (X - mean(X)) / sd(as.vector(X))#empirical null distribution of correlation coefficientsif(P > 0) {nulldist <- sort(doPerm(P, X, Y, absolute, abs_method)$dist)}header <- c('Mixture',colnames(X),"P-value","Correlation","RMSE")if(absolute) header <- c(header, paste('Absolute score (',abs_method,')',sep=""))output <- matrix()itor <- 1mixtures <- dim(Y)[2]pval <- 9999#iterate through mixtureswhile(itor <= mixtures){y <- Y[,itor]#standardize mixturey <- (y - mean(y)) / sd(y)#run SVR core algorithmresult <- CoreAlg(X, y, absolute, abs_method)#get resultsw <- result$wmix_r <- result$mix_rmix_rmse <- result$mix_rmseif(absolute && abs_method == 'sig.score') {w <- w * median(Y[,itor]) / Ymedian}#calculate p-valueif(P > 0) {pval <- 1 - (which.min(abs(nulldist - mix_r)) / length(nulldist))}#print outputout <- c(colnames(Y)[itor],w,pval,mix_r,mix_rmse)if(absolute) out <- c(out, sum(w))if(itor == 1) {output <- out}else {output <- rbind(output, out)}itor <- itor + 1}#save resultswrite.table(rbind(header,output), file="CIBERSORT-Results.txt", sep="\t", row.names=F, col.names=F, quote=F)#return matrix object containing all resultsobj <- rbind(header,output)obj <- obj[,-1]obj <- obj[-1,]obj <- matrix(as.numeric(unlist(obj)),nrow=nrow(obj))rownames(obj) <- colnames(Y)if(!absolute){colnames(obj) <- c(colnames(X),"P-value","Correlation","RMSE")}else{colnames(obj) <- c(colnames(X),"P-value","Correlation","RMSE",paste('Absolute score (',abs_method,')',sep=""))}obj
}

1. 载入CIBERSORT.R

setwd("/test")
source("CIBERSORT.R") # 从文件读取R代码, 在工作目录下 

2. 读入处理表达谱数据

如果已经log2转化了,CIBERSORT自带anti-log2可以转换回来。

表达矩阵一般要是

  • TPM-normalized
  • not log-transformed.
exp_df <- read.csv("ExampleMixtures-GEPs.csv",header=T,check.names=F)
head(exp_df)
# 整理一下行名,列名
rownames(exp_df)=exp_df[,1]
exp_df=exp_df[,2:ncol(exp_df)]### 转化为表达矩阵
data <- as.matrix(exp_df)
#class(data)
#typeof(data)# Transform count data to log2-counts per million (logCPM),
# estimate the mean-variance relationship and use this to compute appropriate observation-level weights.
# The data are then ready for linear modelling.library(limma)
v <-voom(data, plot = F, save.plot = F)
out=v$E  # "matrix" "array"
head(out)
class(out)
typeof(out)
write.table(out,file="query_expr_ready.txt",sep="\t")#把准备输入CIBERSORT的数据保存一下out=rbind(ID=colnames(out),out)write.table(out,file="query_expr_ready.txt",sep="\t",quote=F,col.names=F)  

3. 免疫细胞组成分析

# ## LM22.csv文件转为LM22.txt输入文件
# LM22 <- read.csv("LM22.csv")
# head(LM22[,1:3])
# rownames(LM22) <- LM22$GeneSymbol
# LM22 <- LM22[,-1]
# write.table(LM22,"LM22.txt",sep="\t")results=CIBERSORT("LM22.txt", "query_expr_ready.txt", perm=100, QN=TRUE)
# 保存结果文件
write.csv(results,"CIBERSORT_Output.csv")

4. 结果可视化

#批量调包
# pkgs <- c("matrixStats", "pheatmap", "RColorBrewer",
#           "tidyverse", "cowplot","ggpubr","bslib","ggthemes")
# lapply(pkgs, library, character.only = T)# Read in results
cibersort_raw <- read.csv("CIBERSORT_Output.csv",row.names = 1,header = T)
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(ggpubr) # head(cibersort_raw)
# dim(cibersort_raw)
# colnames(cibersort_raw)
Composition_df <- cibersort_raw %>%rownames_to_column("sample") %>%pivot_longer(cols = 2:23, # 22种细胞类型names_to = "CellType",values_to = "Composition")sub_comp <- Composition_df[,c(5,1,6)] #sample,P.value,Correlation,RMSE,CellType,Composition       ## 免疫细胞在所有样本中的boxplot箱图
ggboxplot(sub_comp,x = "CellType",y = "Composition",color = "black",fill = "CellType",xlab = "",ylab = "Cell composition",main = "TME Cell composition") +theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 1))          ## 每个样本中免疫细胞的组成比例图
ggbarplot(sub_comp,x = "sample",y = "Composition",size = 1,fill = "CellType",color = "CellType") +theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 1,size = 10),legend.position = "bottom")

参考:

CIBERSORTx数据上传格式_weixin_43364556的博客-CSDN博客_cibersortx怎么用

CIBERSORT 学习笔记_菠萝西斯的博客-CSDN博客_cibersort算法

https://github.com/zomithex/CIBERSORT

CIBERSORT进行免疫细胞组成分析相关推荐

  1. 利用CIBERSORT免疫细胞类群分析详细教程

    利用CIBERSORT免疫细胞类群分析详细教程 查看全文 http://www.taodudu.cc/news/show-4326345.html 相关文章: 新冠免疫细胞培养.转染.核酸分析整合解决 ...

  2. CIBERSORT计算免疫细胞丰度

    我也是第一次做cibersort分析,刚好看见前辈们写的教程特别好,所以自己总结一下我所需要的,写成教程主要是方便自己. cibersort计算免疫细胞丰度总共只需要一个函数+一行代码,主要工作在于文 ...

  3. 肿瘤浸润免疫细胞量化分析简介

    欢迎关注"生信修炼手册"! 肿瘤微环境是肿瘤细胞生存和发展的土壤,其中浸润到肿瘤局部的免疫细胞介导了肿瘤免疫微环境,tumor immune microenvironment, 简 ...

  4. 生信工具 | TCGA数据分析工具GEPIA最新更新,用于免疫细胞浸润分析

    GEPIA(http://gepia.cancer-pku.cn/index.html)这个工具可以说是分析TCGA数据库数据分析工具中比较简单好用的工具了,包括生存分析,表达差异分析,相关性分析等, ...

  5. Cibersort免疫浸润的在线分析及R语言代码实现

    上期展示了ESITMATE(基于转录组数据)计算免疫得分和肿瘤纯度的一个例子,详见ggplot2实现分半小提琴图绘制基因表达谱和免疫得分.实际上计算肿瘤纯度的方法还有InfiniumPurify(基于 ...

  6. 不会linux也没关系,点击鼠标即可完成的LDSC分析来了

    欢迎关注"生信修炼手册"! LDSC分析基于已有的GWAS结果,即gwas summary数据,可以评估性状的遗传力,分析两个性状间的遗传相似度.相比GREML, 其运算速度快,更 ...

  7. 想要进行gene prioritization分析,请看这里!

    欢迎关注"生信修炼手册"! 通过GWAS分析可以识别到与性状关联的SNP位点,然而从生物学角度出发,我们更想了解的是哪些基因或者通路导致了这些位点与性状的关联现象.为了解决这一问题 ...

  8. GWAS中的Gene-Gene Interactions如何分析?看这里

    欢迎关注"生信修炼手册"! 在遗传学中,当两个基因相互作用然后导致对应性状的出现,说明两个基因间存在相互作用.在之前的文章中,介绍了很多的基因相互作用模型,列表如下 互补作用 积加 ...

  9. 使用MatrixEQTL进行cis/trans-eQTL分析

    欢迎关注"生信修炼手册"! Matrix是一款经典的eQTL分析软件,可以支持cis和trans-eQTL的分析,官网如下 http://www.bios.unc.edu/rese ...

最新文章

  1. 招聘|腾讯地图平台部招点云算法工程师
  2. Centos7安装Elasticsearch
  3. E431 笔记本电池问题 0190 Critical low-battery error 解决办法
  4. rxjs of操作符传入数组的单步执行
  5. HDU 2049 不容易系列之(4)——考新郎( 错排 )
  6. 夏季快速入睡的7个妙招
  7. 选课 topsort
  8. python 2.7.10 找不到 libmysqlclient.18.dylib 解决方案
  9. atcoder 2017Code festival C ——D题 Yet Another Palindrome Partitioning(思维+dp)
  10. 构造函数强制使用new
  11. 【hadoop】进阶篇一:MapReduce之Job的提交
  12. windows 编程的学习次序
  13. 基于matlab的动态心形图案
  14. 获取指定年月的月初跟月末的时间戳
  15. SOCKS5实现代理服务器(C++)
  16. C++核心准则​NL:命名和布局规则
  17. skywalking了解及搭建使用
  18. 如何把Iconfont阿里巴巴矢量图标库引入web项目和微信小程序中,拿走不谢
  19. Docker镜像与容器的工作原理
  20. Python实现多个excel文件合并源码及打包exe程序

热门文章

  1. 让技术Leader狂点赞的Linux速成手册,到底有多强悍?
  2. Android 摄像头拍照显示 相册显示 图片裁剪绘制显示
  3. ubuntu加了张固态_UbuntuToGo——打造属于自己的移动固态热插拔Liniux操作系统
  4. 为什么Java有GC调优而没听说过有CLR的GC调优?
  5. (附源码)springboot工作计划管理软件 毕业设计 181638
  6. workerman创建wss服务
  7. 2019北京大学研究生推免上机考试
  8. 闲聊: 女神异闻录4
  9. 程序员怒批996背后的支持者,刘强东和马云哑口无言!
  10. 面向对象概念及对象、抽象、类的解释