R语言bioconductor包—maftools的使用

maftools包分析和可视化大规模测序研究中的突变注释格式（MAF）文件。该软件包提供了各种功能来执行癌症基因组学中最常用的分析，并以最小的工作量创建功能丰富的可定制可视化。

1. 数据读入，生成MAF对象

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
#
# BiocManager::install("maftools")library(maftools)
# ls("package:maftools")
# 演示数据
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') laml = read.maf(maf = laml.maf, clinicalData = laml.clin) # MAF对象# 查看MAF对象
getSampleSummary(laml)
getGeneSummary(laml)
getClinicalData(laml)
getFields(laml)
# write.mafSummary(maf = laml, basename = 'laml')# 所有样本中突变统计图
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

2. 取MAF子集

## 取MAF对象子集。tsb：subset by these samples (Tumor Sample Barcodes)
# mafObj = FALSE，返回data.frame
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), mafObj = FALSE)[1:5]
# 默认mafObj = TRUE，返回MAF object
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933')) subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,query = "Variant_Classification == 'Splice_Site'")subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE, query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")# Filter MAF by genes or samples
filterMaf(maf = laml, genes =c("TTN", "AHNAK2"))

3. oncoplot 作图

Usage

oncoplot(maf,top = 20,minMut = NULL,genes = NULL,altered = FALSE,drawRowBar = TRUE,drawColBar = TRUE,leftBarData = NULL,leftBarLims = NULL,rightBarData = NULL,rightBarLims = NULL,topBarData = NULL,logColBar = FALSE,includeColBarCN = TRUE,clinicalFeatures = NULL,annotationColor = NULL,annotationDat = NULL,pathways = NULL,path_order = NULL,selectedPathways = NULL,pwLineCol = "#535c68",pwLineWd = 1,draw_titv = FALSE,titv_col = NULL,showTumorSampleBarcodes = FALSE,barcode_mar = 4,barcodeSrt = 90,gene_mar = 5,anno_height = 1,legend_height = 4,sortByAnnotation = FALSE,groupAnnotationBySize = TRUE,annotationOrder = NULL,sortByMutation = FALSE,keepGeneOrder = FALSE,GeneOrderSort = TRUE,sampleOrder = NULL,additionalFeature = NULL,additionalFeaturePch = 20,additionalFeatureCol = "gray70",additionalFeatureCex = 0.9,genesToIgnore = NULL,removeNonMutated = TRUE,fill = TRUE,cohortSize = NULL,colors = NULL,cBioPortal = FALSE,bgCol = "#CCCCCC",borderCol = "white",annoBorderCol = NA,numericAnnoCol = NULL,drawBox = FALSE,fontSize = 0.8,SampleNamefontSize = 1,titleFontSize = 1.5,legendFontSize = 1.2,annotationFontSize = 1.2,sepwd_genes = 0.5,sepwd_samples = 0.25,writeMatrix = FALSE,colbar_pathway = FALSE,showTitle = TRUE,titleText = NULL,showPct = TRUE
)

# 基因的突变图
oncoplot(maf = laml, top = 20)
oncoplot(maf = laml, draw_titv = TRUE)# 自定义颜色
vc_cols = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(vc_cols) = c('Frame_Shift_Del','Missense_Mutation','Nonsense_Mutation','Multi_Hit','Frame_Shift_Ins','In_Frame_Ins','Splice_Site','In_Frame_Del'
)print(vc_cols)
oncoplot(maf = laml, colors = vc_cols, top = 10)# 加上突变的信号通路
pathways = data.frame(Genes = c("TP53","WT1","PHF6","DNMT3A","DNMT3B","TET1","TET2","IDH1","IDH2","FLT3","KIT","KRAS","NRAS","RUNX1","CEBPA","ASXL1","EZH2","KDM6A"),Pathway = rep(c("TSG", "DNAm", "Signalling", "TFs", "ChromMod"), c(3, 6, 4, 2, 3)),stringsAsFactors = FALSE
)head(pathways)
oncoplot(maf = laml, pathways = pathways)# 加上临床特征
oncoplot(maf = laml, top = 20,clinicalFeatures = "FAB_classification")

4. 其他突变统计分析图

### 1.plotTiTv
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)##plot titv summary:
# 1.contributions of 6 mutational conversion
# 2. Ti/Tv ratios of each sample
plotTiTv(res = laml.titv)### 2. lollipopPlot
## 绘制棒棒糖的氨基酸变化图。蛋白质结构域来源于PFAM数据库。
lollipopPlot(maf = laml,gene = 'DNMT3A',AACol = 'Protein_Change',showMutationRate = TRUE,labelPos = 882, # 只表这个位置的突变:c(882,909),"all"repel = TRUE
)### 3. plotProtein
# 画蛋白结构域图
plotProtein(gene = "TP53")
plotProtein(gene = "TP53",refSeqID = "NM_000546")
plotProtein(gene = "DNMT3A")
plotProtein(gene = "KRAS",showDomainLabel=FALSE)# http://pfam.xfam.org### 4. rainfallPlot
## 降雨图显示基因组突变热点区域。
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)### 5. tcgaCompare
## 将输入MAF中的突变负荷与来自MC3项目的所有33个TCGA队列进行比较。
par(fig=c(0,0.8,0,0.8))  #c(left, right,bottom,top)
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML',logscale = TRUE, capture_size = 50)### 6. plotVaf
# 统计基因在各自样本中突变频率的箱线图
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU')  # i_TumorVAF_WU: 样本名称
plotVaf(maf = laml, genes = "DNMT3A",vafCol = 'i_TumorVAF_WU')### 7. plotCBSsegments
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")# 画某一样本的拷贝数变异图
# tcga.ab.009.seg文件字段：Sample Chromosome Start End Num_Probes Segment_Mean
par(fig=c(0.01,1,0,1))  #c(left, right,bottom,top)
plotCBSsegments(cbsFile = tcga.ab.009.seg)### 8. somaticInteractions#前25个突变基因的排他/共出现事件统计分析作图。
dev.new()
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))### 9. plotOncodrive
# 基于变异的位置聚类检测癌症驱动基因并作图
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')
head(laml.sig)
plotOncodrive(res = laml.sig, fdrCutOff = 0.01, useFraction = TRUE, labelSize = 0.5)### 10. pfamDomains
# 突变结构域统计图
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

5. 联合生存分析作图

# 根据基因突变分组并作生存分析图
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)# 预测与生存相关的基因集
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2, time = "days_to_last_followup", Status = "Overall_Survival_Status", verbose = FALSE)
print(prog_geneset)
# 根据基因集(geneset)突变分组并作生存分析图
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"), time = "days_to_last_followup", Status = "Overall_Survival_Status")

6.比较MAF并作图

###不同MAF对象比较##########
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)# fisher检验两个队列(MAF),以发现差异突变基因。
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'Primary', m2Name = 'Relapse', minMut = 5)
print(pt.vs.rt)
# 不同队列中的基因突变差异森林图。x轴为log10转换优势比，y轴上的差异突变基因。
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1)# 不同队列中的基因突变差异图：显示具体突变类型
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)coBarplot(m1 = primary.apl, m2 = relapse.apl, m1Name = "Primary", m2Name = "Relapse")lollipopPlot2(m1 = primary.apl, m2 = relapse.apl, gene ="PML", AACol1 = "amino_acid_change", AACol2 = "amino_acid_change", m1_name = "Primary", m2_name = "Relapse")
# 不同临床特征分组中基因突变差异比较并作富集图（每一组都和其余比）
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')
fab.ce$groupwise_comparision[p_value < 0.05]
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05)

7. 药物相互作用的突变基因分析

# 队列中和药物的相互作用的突变基因
dgi = drugInteractions(maf = laml, fontSize = 0.75)
length(table(dgi$Gene))
names(table(dgi$Gene))
#dgi[which(dgi$category=="LIPID KINASE"),]dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]

8. 突变基因在致癌通路的富集分析

# 突变基因在已知致癌通路的富集及作图
OncogenicPathways(maf = laml)
PlotOncogenicPathways(maf = laml, pathways = "RTK-RAS")
PlotOncogenicPathways(maf = laml, pathways = "TP53")

9. VAF聚类

#基于变异等位基因频率（VAF）对某一样本的变异进行聚类
#install.packages("mclust")
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
plotClusters(clusters = tcga.ab.2972.het)seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
dev.new()
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
# 加上拷贝数变异聚类信息
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

10. 突变特征（mutational signatures）分析

### Extract mutational signatures###########
# APOBEC富集样品和非APOBEC富集样品之间的差异并作图:
# 包括突变负荷、tCw基序分布和主要突变基因差异的子图。
library("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg19")
dev.new()
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)#install.packages("NMF")
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6, pConstant = 0.1, plotBestFitRes = FALSE, parallel = 2)
plotCophenetic(res = laml.sign)
laml.sig = extractSignatures(mat = laml.tnm, n = 3, pConstant = 0.1, parallel = 2)laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")pheatmap::pheatmap(mat = laml.v3.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")

11.其他格式数据转化为MAF对象

# Converts annovar annotations into MAF.
# 参数 MAFobj 默认为FALSE，返回data.frame格式数据
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', MAFobj = TRUE,tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')# Converts ICGC Simple Somatic Mutation format file to MAF
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

12. 读入并处理gistic输出数据

## Read and summarize gistic output
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)
class(laml.gistic)   #An object of class  GISTIC # 突出显示扩增和缺失区域片段的基因组图。
gisticChromPlot(gistic = laml.gistic, markBands = "all")
# 样本数对cytobands拷贝数变异显著性（-log10q）的气泡图
gisticBubblePlot(gistic = laml.gistic)# 不同样本拷贝数变异图，类似展示基因突变的oncoplot
# par(fig=c(0,0.2,0.2,0.8))  #c(left, right,bottom,top)
#par(fin=c(5,6))
# par(mai =c(0,0.2,0.2,2))  # 无法设定
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), clinicalFeatures = 'FAB_classification',gene_mar = 8,barcode_mar = 6,sepwd_genes = 0.5,sepwd_samples = 0.25,sortByAnnotation = TRUE, top = 10)

参考

https://www.bioconductor.org/packages/release/bioc/html/maftools.html

R语言bioconductor包—maftools的使用相关推荐

R语言Bioconductor安装全流程
R语言Bioconductor安装全流程作为一只生物狗,R语言对于生物数据分析真的很重要!如果你翻翻生物专业研究生的朋友圈,你的感觉一定是: 满世界都在学R语言: 满世界都在吐槽R包难装! 下面给大 ...
r语言 bsda包_使用R语言creditmodel包进行Vintage分析或留存率分析
1 什么是vintage分析? Vintage分析(账龄分析法)被广泛应用于信用卡及信贷行业,这个概念起源于葡萄酒,即不同年份出产的葡萄酒的品质有差异,那么不同时期开户或者放款的资产质量也有差异,其核 ...
R语言caret包构建机器学习回归模型（regression model）、使用DALEX包进行模型解释分析、特征重要度、偏依赖分析等
R语言caret包构建机器学习回归模型(regression model).使用DALEX包进行模型解释分析.特征重要度.偏依赖分析等目录
R语言数据包自带数据集之ISwR包的melanom数据集字段解释、数据导入实战
R语言数据包自带数据集之ISwR包的melanom数据集字段解释.数据导入实战目录 R语言数据包自带数据集之ISwR包的melanom数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导入数 ...
R语言数据包自带数据集之survival包的colon数据集字段解释、数据导入实战
R语言数据包自带数据集之survival包的colon数据集字段解释.数据导入实战 #数据字段说明 colon数据集:B/C期结肠癌辅助化疗治疗数据 d # 患者编号 study # 所有患者都是1 ...
R语言数据包自带数据集之survival包的lung数据集字段解释、数据导入实战
R语言数据包自带数据集之survival包的lung数据集字段解释.数据导入实战目录 R语言数据包自带数据集之survival包的lung数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导 ...
R语言数据包自带数据集之ToothGrowth数据集字段解释、数据导入实战
R语言数据包自带数据集之ToothGrowth数据集字段解释.数据导入实战目录 R语言数据包自带数据集之ToothGrowth数据集字段解释.数据导入实战 #数据字段说明 #导入包 #导入数据 #数 ...
R语言数据包自带数据集之mtcars数据集字段解释、数据导入实战
R语言数据包自带数据集之mtcars数据集字段解释.数据导入实战目录 R语言数据包自带数据集之mtcars数据集字段解释.数据导入实战 #会用帮助?或者help函数 #字段说明 #导入包 #导入数据 ...
R语言dplyr包通过数据列的索引重命名数据列实战（Rename Column by Index Position）
R语言dplyr包通过数据列的索引重命名数据列实战(Rename Column by Index Position) 目录 R语言dplyr包通过数据列的索引重命名数据列实战(Rename Colum ...

R语言bioconductor包—maftools的使用

1. 数据读入，生成MAF对象

2. 取MAF子集

3. oncoplot 作图

Usage

4. 其他突变统计分析图

5. 联合生存分析作图

6.比较MAF并作图

7. 药物相互作用的突变基因分析

8. 突变基因在致癌通路的富集分析

9. VAF聚类

10. 突变特征（mutational signatures）分析

11.其他格式数据转化为MAF对象

12. 读入并处理gistic输出数据

R语言bioconductor包—maftools的使用相关推荐

最新文章

热门文章