splatter包生成单细胞RNA测序数据
Splatter是一个模拟单细胞RNA测序计数数据的软件包。它提供了一个简单的界面,用于创建可复制且文档充分的复杂模拟。可以从真实数据估计参数,并提供用于比较真实数据集和模拟数据集的函数。
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("splatter")library(splatter)
library(scater)### 1. 快速创建模拟数据
# Usage
# mockSCE(ncells = 200, ngenes = 2000, nspikes = 100)
set.seed(1)
sce <- mockSCE() # class: SingleCellExperiment
counts(sce)### 2. 参数估计
# 从数据(模拟数据或真实的单细胞测序数据)中估计参数
params <- splatEstimate(sce) # Params object### 3. 自定义参数,创建模拟数据
# 创建参数对象
params <- newSplatParams() #默认10000 Genes,100 Cells
# 查看参数对象中的参数设定
getParam(params, "nGenes")
getParam(params, "nCells")
getParams(params, c("nGenes", "mean.rate", "mean.shape"))
# 设定参数对象中的参数
params <- setParam(params, "nGenes", 5000)
params <- setParam(params, "nCells", 50)
params <- setParams(params, mean.shape = 0.5, de.prob = 0.2)
params <- setParams(params, update = list(nGenes = 8000, mean.rate = 0.5))# 根据参数,创建模拟数据
sim <- splatSimulate(params)### 4. 提取SingleCellExperiment类信息
class(sim)
# Information about genes
head(rowData(sim))
# Information about cells
head(colData(sim))
# Gene by cell matrices
names(assays(sim))
# Example of cell means matrix
assays(sim)$CellMeans[1:5, 1:5]
# 表达矩阵
counts <- counts(sim)
counts[1:3,1:5]class(counts) # "matrix" "array"
typeof(counts) # [1] "integer"
dim(counts) #[1] 8000 100### 5. 数据标准化,对数化和降维
## {scuttle}包的函数:logNormCounts
## {scater}包的函数:runPCA,plotPCA
# SingleCellExperiment 对象的counts值计算对数转换的归一化表达式值
sim <- logNormCounts(sim)
counts(sim)[1:3,1:5]
logcounts(sim)[1:3,1:5]
# PCA降维
sim <- runPCA(sim)
plotPCA(sim)### 6. 创建复杂的模拟数据
## groups
sim.groups <- splatSimulate(group.prob = c(0.3, 0.7), method = "groups",verbose = FALSE)
sim.groups <- logNormCounts(sim.groups)
sim.groups <- runPCA(sim.groups)
plotPCA(sim.groups, colour_by = "Group")dim(counts(sim.groups))
rowData(sim.groups)$DEFacGroup1
rowData(sim.groups)$DEFacGroup2
metadata(sim.groups)## paths
sim.paths <- splatSimulate(de.prob = 0.2, nGenes = 1000, method = "paths",verbose = FALSE)
sim.paths <- logNormCounts(sim.paths)
sim.paths <- runPCA(sim.paths)
plotPCA(sim.paths, colour_by = "Step")
colData(sim.paths)$Step## batches
sim.batches <- splatSimulate(batchCells = c(50, 50), verbose = FALSE)
sim.batches <- logNormCounts(sim.batches)
sim.batches <- runPCA(sim.batches)
plotPCA(sim.batches, colour_by = "Batch")
rowData(sim.batches)$BatchFacBatch1
rowData(sim.batches)$BatchFacBatch2### 7.比较SingleCellExperiment对象
## 相互比较
sim1 <- splatSimulate(nGenes = 1000, batchCells = 20, verbose = FALSE)
sim2 <- simpleSimulate(nGenes = 1000, nCells = 20, verbose = FALSE)
comparison <- compareSCEs(list(Splat = sim1, Simple = sim2))names(comparison)
comparison$RowData
comparison$ColDatanames(comparison$Plots)
comparison$Plots$Means
comparison$Plots$LibrarySizes
comparison$Plots$ZerosCell## 与参考进行比较
sim1 <- splatSimulate(nGenes = 1000, batchCells = 100, verbose = FALSE)
sim2 <- splatSimulate(nGenes = 1000, batchCells = c(40, 60), verbose = FALSE)
sim3 <- simpleSimulate(nGenes = 1000, nCells = 100, verbose = FALSE)
difference <- diffSCEs(list(Splat1 = sim1, Splat2 = sim2, Simple = sim3), ref = "Simple")
difference$Plots$Means
difference$QQPlots$Means### 8.批次效应参数设置
library("splatter")
library("scater")
library("ggplot2")# Simulation with small batch effects
sim1 <- splatSimulate(params, batchCells = c(100, 100),batch.facLoc = 0.001, batch.facScale = 0.001,verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("Small batch effects")# Simulation with big batch effects
sim2 <- splatSimulate(params, batchCells = c(100, 100),batch.facLoc = 0.5, batch.facScale = 0.5,verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Big batch effects")# 是否消除批次效应设置
sim1 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = FALSE,
verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("With batch effects")sim2 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = TRUE,verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Batch effects removed")### 9. 离群值参数设置
## ----outlier-prob-------------------------------------------------------------
# Few outliers
sim1 <- splatSimulate(out.prob = 0.001, verbose = FALSE)
ggplot(as.data.frame(rowData(sim1)),aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +geom_histogram(bins = 100) +ggtitle("Few outliers")# Lots of outliers
sim2 <- splatSimulate(out.prob = 0.2, verbose = FALSE)
ggplot(as.data.frame(rowData(sim2)),aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +geom_histogram(bins = 100) +ggtitle("Lots of outliers")### 10.设置组参数
# One small group, one big group
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.9, 0.1),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("One small group, one big group")# Five groups
sim2 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Five groups")### 11. 设置差异表达基因参数
## de.prob 参数
# Few DE genes
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.prob = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Few DE genes")# Lots of DE genes
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.prob = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Lots of DE genes")## de.facLoc参数
# Small DE factors
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.facLoc = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Small DE factors")# Big DE factors
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.facLoc = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Big DE factors")## complex-de---------------------------------------------------------------
# Different DE probs
sim1 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),de.prob = c(0.01, 0.01, 0.1, 0.1, 0.3),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") +labs(title = "Different DE probabilities",caption = paste("Groups 1 and 2 have very few DE genes,","Groups 3 and 4 have the default number,","Group 5 has many DE genes"))# Different DE factors
sim2 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),de.facLoc = c(0.01, 0.01, 0.1, 0.1, 0.2),de.facScale = c(0.2, 0.5, 0.2, 0.5, 0.4),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") +labs(title = "Different DE factors",caption = paste("Group 1 has factors with small location (value),","and scale (variability),","Group 2 has small location and greater scale.\n","Groups 3 and 4 have greater location with small,","and large scales","Group 5 has bigger factors with moderate","variability"))# Combination of everything
sim3 <- splatSimulateGroups(params.groups,group.prob = c(0.05, 0.2, 0.2, 0.2, 0.35),de.prob = c(0.3, 0.1, 0.2, 0.01, 0.1),de.downProb = c(0.1, 0.4, 0.9, 0.6, 0.5),de.facLoc = c(0.6, 0.1, 0.1, 0.01, 0.2),de.facScale = c(0.1, 0.4, 0.2, 0.5, 0.4),verbose = FALSE)
sim3 <- logNormCounts(sim3)
sim3 <- runPCA(sim3)
plotPCA(sim3, colour_by = "Group") +labs(title = "Different DE factors",caption = paste("Group 1 is small with many very up-regulated DE genes,","Group 2 has the default DE parameters,\n","Group 3 has many down-regulated DE genes,","Group 4 has very few DE genes,","Group 5 is large with moderate DE factors"))### 12. path参数设置
# Linear paths
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulatePaths(params.groups,group.prob = c(0.25, 0.25, 0.25, 0.25),de.prob = 0.5, de.facLoc = 0.2,path.from = c(0, 1, 2, 3),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Linear paths")# Branching path
sim2 <- splatSimulatePaths(params.groups,group.prob = c(0.25, 0.25, 0.25, 0.25),de.prob = 0.5, de.facLoc = 0.2,path.from = c(0, 1, 1, 3),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Branching path")## ----paths-steps------------------------
# Few steps
sim1 <- splatSimulatePaths(params.groups, path.nSteps = 3,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Few steps")# Lots of steps
sim2 <- splatSimulatePaths(params.groups, path.nSteps = 1000,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Step") + ggtitle("Lots of steps")## ----paths-skew-------------------
# Skew towards the end
sim1 <- splatSimulatePaths(params.groups, path.skew = 0.1,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Skewed towards the end")
参考
https://www.bioconductor.org/packages/release/bioc/html/splatter.html
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5596896/
splatter包生成单细胞RNA测序数据相关推荐
- 【R语言】Splatter,一个用于简单模拟单细胞RNA测序数据的R包
Splatter是一个用于模拟单细胞RNA测序数据的R包,本文概述并介绍Splatter的功能 一.参数功能 名称 功能 说明 可以通过splatEstimate函数估计 备注 nGenes -> ...
- 生信论文分享:通过稳健矩阵分解对单细胞rna测序数据进行插值
题目:scRMD: imputation for single cell RNA-seq data via robust matrix decomposition 出处:bioinformatics, ...
- 文献阅读 | 基于单细胞RNA测序数据的谱系追踪
Overcoming Genetic Drop-outs in Variants-based Lineage Tracing from Single-cell RNA Sequencing Data ...
- 重磅综述:三万字长文读懂单细胞RNA测序分析的最佳实践教程 (原理、代码和评述)
原文链接: https://www.embopress.org/doi/10.15252/msb.20188746 主编评语 这篇文章最好的地方不只在于推荐了工具,提供了一套分析流程,更在于详细介绍了 ...
- Mila唐建团队新作:可迁移、可解释的单细胞RNA测序模型
[栏目:前沿进展]近日,McGill大学的李岳老师和魁北克人工智能研究所Mila唐建老师团队共同提出了一种高效.易用.可拓展.可迁移.可解释的模型--scETM,用于单细胞RNA测序工作,并于Natu ...
- 一文了解单细胞RNA测序的可视化与统计分析如何更简单高效
单细胞RNA测序(scRNA-seq)的出现为探索单细胞水平的基因表达谱提供了前所未有的机会.目前,scRNA-seq已成为研究细胞异质性的关键生物学问题(尤其是在肿瘤学和免疫学研究中)的首选.然而, ...
- Nature | 基于单细胞RNA测序绘制人类肺组织分子细胞图谱,成功鉴定多种未知细胞类型...
单细胞RNA-seq技术已经在绘制器官基因表达谱研究中发挥了关键作用,但目前很难系统地鉴定和定位单个器官中所有分子细胞的类型,并创建完整的分子细胞图谱.近期发表的研究成果中,多个细胞类型特异性标记分子 ...
- 【佳学基因人工智能】RNA测序数据的信息分析——基因解码信息源的准备
[佳学基因人工智能]RNA测序数据的信息分析--基因解码信息源的准备 人的基因信息解码策略 人的基因信息解码有两种策略,一是数据库比对策略,二是基因解码策略.数据库比对策略只能用数据库中记录过的案例. ...
- 单细胞RNA测序技术之入门指南
单细胞RNA测序技术之入门指南 [字体: 大 中 小 ] 时间:2018年09月12日 来源:生物通 编辑推荐: 在这个飞速发展的测序时代,DNA和RNA测序已经逐渐成为"实验室中的家常菜& ...
最新文章
- python Tkinter学习笔记 menu控件 02
- CSS 解决td里面内容太多把表格弄变形的原因,设置 自动换行。
- DHCP中继代理;DHCP突破vlan限制
- html5基础知识点表单
- css3 图片放大缩小闪烁效果
- linux 常用命令03--修改文件的权限与归属
- spring elasticsearch 按条件删除_实战:项目数据源转为Elasticsearch
- kali安装pip3
- 漫步最优化四十五——矩阵S的生成
- 2020年“双11”各家晒出成绩单,你还没付完尾款,有的人已经收货了!
- 支持医学研究的Apple开源移动框架
- c++ string字符串翻转
- 互不相识的人在什么情况下会给你点赞呢?
- 【企业架构】什么是 Zachman 框架? 用于管理企业架构的矩阵
- Kali Linux 基于Easy File Sharing Web Server 6.9 编写漏洞渗透模块 (上)
- 云诺网盘为什么关停了好用的企业网盘有哪些
- 信号与系统作业之我的朋友把我的大作业分享了好朋友
- 大数据学习计划【2019经典不断更新】
- azkaban任务一直处于preparing,解决办法
- json学习笔记(圣思园视频学习笔记)
热门文章
- 实体-关系联合抽取:Incremental Joint Extraction of Entity Mentions and Relations
- linux 下安装apache 快速教程
- Picture2Epub
- 系统集成项目管理工程师05《项目进度管理》
- 年终盘点!2022顶会论文代码大合集!
- pythonxpath判断元素是否存在_Python Lxml(objectify):检查标签是否存在
- pycharm解决光标变粗,关闭改写模式
- 如何创建低成本沙箱环境?推荐你使用API仿真!
- 一个C#写的爬虫程序
- 散列函数(哈希函数,Hash Function)