maftools包分析和可视化大规模测序研究中的突变注释格式(MAF)文件。该软件包提供了各种功能来执行癌症基因组学中最常用的分析,并以最小的工作量创建功能丰富的可定制可视化。

1. 数据读入,生成MAF对象

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
#
# BiocManager::install("maftools")library(maftools)
# ls("package:maftools")
# 演示数据
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') laml = read.maf(maf = laml.maf, clinicalData = laml.clin) # MAF对象# 查看MAF对象
getSampleSummary(laml)
getGeneSummary(laml)
getClinicalData(laml)
getFields(laml)
# write.mafSummary(maf = laml, basename = 'laml')# 所有样本中突变统计图
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

2. 取MAF子集

## 取MAF对象子集。tsb:subset by these samples (Tumor Sample Barcodes)
# mafObj = FALSE,返回data.frame
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), mafObj = FALSE)[1:5]
# 默认mafObj = TRUE,返回MAF object
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933')) subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,query = "Variant_Classification == 'Splice_Site'")subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE, query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")# Filter MAF by genes or samples
filterMaf(maf = laml, genes =c("TTN", "AHNAK2"))

3. oncoplot 作图

Usage

oncoplot(maf,top = 20,minMut = NULL,genes = NULL,altered = FALSE,drawRowBar = TRUE,drawColBar = TRUE,leftBarData = NULL,leftBarLims = NULL,rightBarData = NULL,rightBarLims = NULL,topBarData = NULL,logColBar = FALSE,includeColBarCN = TRUE,clinicalFeatures = NULL,annotationColor = NULL,annotationDat = NULL,pathways = NULL,path_order = NULL,selectedPathways = NULL,pwLineCol = "#535c68",pwLineWd = 1,draw_titv = FALSE,titv_col = NULL,showTumorSampleBarcodes = FALSE,barcode_mar = 4,barcodeSrt = 90,gene_mar = 5,anno_height = 1,legend_height = 4,sortByAnnotation = FALSE,groupAnnotationBySize = TRUE,annotationOrder = NULL,sortByMutation = FALSE,keepGeneOrder = FALSE,GeneOrderSort = TRUE,sampleOrder = NULL,additionalFeature = NULL,additionalFeaturePch = 20,additionalFeatureCol = "gray70",additionalFeatureCex = 0.9,genesToIgnore = NULL,removeNonMutated = TRUE,fill = TRUE,cohortSize = NULL,colors = NULL,cBioPortal = FALSE,bgCol = "#CCCCCC",borderCol = "white",annoBorderCol = NA,numericAnnoCol = NULL,drawBox = FALSE,fontSize = 0.8,SampleNamefontSize = 1,titleFontSize = 1.5,legendFontSize = 1.2,annotationFontSize = 1.2,sepwd_genes = 0.5,sepwd_samples = 0.25,writeMatrix = FALSE,colbar_pathway = FALSE,showTitle = TRUE,titleText = NULL,showPct = TRUE
)
# 基因的突变图
oncoplot(maf = laml, top = 20)
oncoplot(maf = laml, draw_titv = TRUE)# 自定义颜色
vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(vc_cols) = c('Frame_Shift_Del','Missense_Mutation','Nonsense_Mutation','Multi_Hit','Frame_Shift_Ins','In_Frame_Ins','Splice_Site','In_Frame_Del'
)print(vc_cols)
oncoplot(maf = laml, colors = vc_cols, top = 10)# 加上突变的信号通路
pathways = data.frame(Genes = c("TP53","WT1","PHF6","DNMT3A","DNMT3B","TET1","TET2","IDH1","IDH2","FLT3","KIT","KRAS","NRAS","RUNX1","CEBPA","ASXL1","EZH2","KDM6A"),Pathway = rep(c("TSG", "DNAm", "Signalling", "TFs", "ChromMod"), c(3, 6, 4, 2, 3)),stringsAsFactors = FALSE
)head(pathways)
oncoplot(maf = laml, pathways = pathways)# 加上临床特征
oncoplot(maf = laml, top = 20,clinicalFeatures = "FAB_classification")

4. 其他突变统计分析图

### 1.plotTiTv
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)##plot titv summary:
# 1.contributions of 6 mutational conversion
# 2. Ti/Tv ratios of each sample
plotTiTv(res = laml.titv)### 2. lollipopPlot
## 绘制棒棒糖的氨基酸变化图。蛋白质结构域来源于PFAM数据库。
lollipopPlot(maf = laml,gene = 'DNMT3A',AACol = 'Protein_Change',showMutationRate = TRUE,labelPos = 882, # 只表这个位置的突变:c(882,909),"all"repel = TRUE
)### 3. plotProtein
# 画蛋白结构域图
plotProtein(gene = "TP53")
plotProtein(gene = "TP53",refSeqID = "NM_000546")
plotProtein(gene = "DNMT3A")
plotProtein(gene = "KRAS",showDomainLabel=FALSE)# http://pfam.xfam.org### 4. rainfallPlot
## 降雨图显示基因组突变热点区域。
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)### 5. tcgaCompare
## 将输入MAF中的突变负荷与来自MC3项目的所有33个TCGA队列进行比较。
par(fig=c(0,0.8,0,0.8))  #c(left, right,bottom,top)
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML',logscale = TRUE, capture_size = 50)### 6. plotVaf
# 统计基因在各自样本中突变频率的箱线图
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU')  # i_TumorVAF_WU: 样本名称
plotVaf(maf = laml, genes = "DNMT3A",vafCol = 'i_TumorVAF_WU')### 7. plotCBSsegments
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")# 画某一样本的拷贝数变异图
# tcga.ab.009.seg文件字段:Sample Chromosome Start End Num_Probes Segment_Mean
par(fig=c(0.01,1,0,1))  #c(left, right,bottom,top)
plotCBSsegments(cbsFile = tcga.ab.009.seg)### 8. somaticInteractions#前25个突变基因的排他/共出现事件统计分析作图。
dev.new()
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))### 9. plotOncodrive
# 基于变异的位置聚类检测癌症驱动基因并作图
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')
head(laml.sig)
plotOncodrive(res = laml.sig, fdrCutOff = 0.01, useFraction = TRUE, labelSize = 0.5)### 10. pfamDomains
# 突变结构域统计图
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

5. 联合生存分析作图

# 根据基因突变分组并作生存分析图
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)# 预测与生存相关的基因集
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2, time = "days_to_last_followup", Status = "Overall_Survival_Status", verbose = FALSE)
print(prog_geneset)
# 根据基因集(geneset)突变分组并作生存分析图
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"), time = "days_to_last_followup", Status = "Overall_Survival_Status")

6.比较MAF并作图

###不同MAF对象比较##########
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)# fisher检验两个队列(MAF),以发现差异突变基因。
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'Primary', m2Name = 'Relapse', minMut = 5)
print(pt.vs.rt)
# 不同队列中的基因突变差异森林图。x轴为log10转换优势比,y轴上的差异突变基因。
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1)# 不同队列中的基因突变差异图:显示具体突变类型
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)coBarplot(m1 = primary.apl, m2 = relapse.apl, m1Name = "Primary", m2Name = "Relapse")lollipopPlot2(m1 = primary.apl, m2 = relapse.apl, gene ="PML", AACol1 = "amino_acid_change", AACol2 = "amino_acid_change", m1_name = "Primary", m2_name = "Relapse")
# 不同临床特征分组中基因突变差异比较并作富集图(每一组都和其余比)
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')
fab.ce$groupwise_comparision[p_value < 0.05]
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05)

7. 药物相互作用的突变基因分析

# 队列中和药物的相互作用的突变基因
dgi = drugInteractions(maf = laml, fontSize = 0.75)
length(table(dgi$Gene))
names(table(dgi$Gene))
#dgi[which(dgi$category=="LIPID KINASE"),]dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]

8. 突变基因在致癌通路的富集分析

# 突变基因在已知致癌通路的富集及作图
OncogenicPathways(maf = laml)
PlotOncogenicPathways(maf = laml, pathways = "RTK-RAS")
PlotOncogenicPathways(maf = laml, pathways = "TP53")

9. VAF聚类

#基于变异等位基因频率(VAF)对某一样本的变异进行聚类
#install.packages("mclust")
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
plotClusters(clusters = tcga.ab.2972.het)seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
dev.new()
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
# 加上拷贝数变异聚类信息
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

10. 突变特征(mutational signatures)分析

### Extract mutational signatures###########
# APOBEC富集样品和非APOBEC富集样品之间的差异并作图:
# 包括突变负荷、tCw基序分布和主要突变基因差异的子图。
library("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg19")
dev.new()
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)#install.packages("NMF")
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6, pConstant = 0.1, plotBestFitRes = FALSE, parallel = 2)
plotCophenetic(res = laml.sign)
laml.sig = extractSignatures(mat = laml.tnm, n = 3, pConstant = 0.1, parallel = 2)laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")pheatmap::pheatmap(mat = laml.v3.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")

11.其他格式数据转化为MAF对象

# Converts annovar annotations into MAF.
# 参数 MAFobj 默认为FALSE,返回data.frame格式数据
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', MAFobj = TRUE,tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')# Converts ICGC Simple Somatic Mutation format file to MAF
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

12. 读入并处理gistic输出数据

## Read and summarize gistic output
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)
class(laml.gistic)   #An object of class  GISTIC # 突出显示扩增和缺失区域片段的基因组图。
gisticChromPlot(gistic = laml.gistic, markBands = "all")
# 样本数对cytobands拷贝数变异显著性(-log10q)的气泡图
gisticBubblePlot(gistic = laml.gistic)# 不同样本拷贝数变异图,类似展示基因突变的oncoplot
# par(fig=c(0,0.2,0.2,0.8))  #c(left, right,bottom,top)
#par(fin=c(5,6))
# par(mai =c(0,0.2,0.2,2))  # 无法设定
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), clinicalFeatures = 'FAB_classification',gene_mar = 8,barcode_mar = 6,sepwd_genes = 0.5,sepwd_samples = 0.25,sortByAnnotation = TRUE, top = 10)

参考

https://www.bioconductor.org/packages/release/bioc/html/maftools.html

R语言bioconductor包—maftools的使用相关推荐

  1. R语言Bioconductor安装全流程

    R语言Bioconductor安装全流程 作为一只生物狗,R语言对于生物数据分析真的很重要!如果你翻翻生物专业研究生的朋友圈,你的感觉一定是: 满世界都在学R语言: 满世界都在吐槽R包难装! 下面给大 ...

  2. r语言 bsda包_使用R语言creditmodel包进行Vintage分析或留存率分析

    1 什么是vintage分析? Vintage分析(账龄分析法)被广泛应用于信用卡及信贷行业,这个概念起源于葡萄酒,即不同年份出产的葡萄酒的品质有差异,那么不同时期开户或者放款的资产质量也有差异,其核 ...

  3. R语言caret包构建机器学习回归模型(regression model)、使用DALEX包进行模型解释分析、特征重要度、偏依赖分析等

    R语言caret包构建机器学习回归模型(regression model).使用DALEX包进行模型解释分析.特征重要度.偏依赖分析等 目录

  4. R语言数据包自带数据集之ISwR包的melanom数据集字段解释、数据导入实战

    R语言数据包自带数据集之ISwR包的melanom数据集字段解释.数据导入实战 目录 R语言数据包自带数据集之ISwR包的melanom数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导入数 ...

  5. R语言数据包自带数据集之survival包的colon数据集字段解释、数据导入实战

    R语言数据包自带数据集之survival包的colon数据集字段解释.数据导入实战 #数据字段说明 colon数据集:B/C期结肠癌辅助化疗治疗数据 d # 患者编号 study # 所有患者都是1 ...

  6. R语言数据包自带数据集之survival包的lung数据集字段解释、数据导入实战

    R语言数据包自带数据集之survival包的lung数据集字段解释.数据导入实战 目录 R语言数据包自带数据集之survival包的lung数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导 ...

  7. R语言数据包自带数据集之ToothGrowth数据集字段解释、数据导入实战

    R语言数据包自带数据集之ToothGrowth数据集字段解释.数据导入实战 目录 R语言数据包自带数据集之ToothGrowth数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导入数据 #数 ...

  8. R语言数据包自带数据集之mtcars数据集字段解释、数据导入实战

    R语言数据包自带数据集之mtcars数据集字段解释.数据导入实战 目录 R语言数据包自带数据集之mtcars数据集字段解释.数据导入实战 #会用帮助?或者help函数 #字段说明 #导入包 #导入数据 ...

  9. R语言dplyr包通过数据列的索引重命名数据列实战(Rename Column by Index Position)

    R语言dplyr包通过数据列的索引重命名数据列实战(Rename Column by Index Position) 目录 R语言dplyr包通过数据列的索引重命名数据列实战(Rename Colum ...

最新文章

  1. String内存分配
  2. Spark:使用partitionColumn选项读取数据库原理
  3. 从零开始的全栈工程师——html篇1.6
  4. 10-fold Cross Validation
  5. java9之后,String为何从char类型数组转成byte类型数组
  6. HTTP有哪些特点?
  7. 只需45秒,用Python给故宫画一组雪景手绘图
  8. Excel将多行带空格的数据插入到表格中
  9. 德州达拉斯大学计算机录取要求,德克萨斯大学达拉斯分校申请条件(德克萨斯大...
  10. 单元测试、集成测试、系统测试、验收测试
  11. 计算机和用户账户名一样,求计算机账户与用户账户的区别与联系?
  12. CH341a烧录器烧录华硕BIOS
  13. 防抖(debounce) 和 节流(throttling)的封装使用-最终发布npm
  14. 机械手臂c语言如何编程,工业机械手臂程序示教图文教程
  15. 记账的目的和好处是什么
  16. matlab求解非线性常微分方程组,求一道用matlab编程解非线性方程组
  17. 各个公司项目阶段划分
  18. return的用法是什么?
  19. 最新开源版知音QQ助手V1.0.16版本
  20. DVWA全关教程手册

热门文章

  1. MySQL数据库中的MyISAM和InnoDB存储引擎对比
  2. ubuntu16.04 sudo apt-get update解决Hash sum错误
  3. java facets_Java UIComponent.getFacets方法代码示例
  4. springboot 主键重复导致数据重复_Springboot实现防重复提交和防重复点击(附源码)...
  5. TPAMI 2022 | 国防科大等高校提出光场解耦机制,在超分辨与视差估计任务上取得优异性能...
  6. 一分钟详解initUndistortRectifyMap函数bug修复方法
  7. QT+VS打包发布流程该怎么做?
  8. mysql中拼接用什么_MySQL中常用的拼接语句的小结(代码示例)
  9. 回顾Java课本容易遗忘的知识(一)
  10. Java线程优先级的概念