Splatter是一个模拟单细胞RNA测序计数数据的软件包。它提供了一个简单的界面,用于创建可复制且文档充分的复杂模拟。可以从真实数据估计参数,并提供用于比较真实数据集和模拟数据集的函数。

# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("splatter")library(splatter)
library(scater)### 1. 快速创建模拟数据
# Usage
# mockSCE(ncells = 200, ngenes = 2000, nspikes = 100)
set.seed(1)
sce <- mockSCE() # class: SingleCellExperiment
counts(sce)### 2. 参数估计
# 从数据(模拟数据或真实的单细胞测序数据)中估计参数
params <- splatEstimate(sce)  # Params object### 3. 自定义参数,创建模拟数据
# 创建参数对象
params <- newSplatParams() #默认10000 Genes,100 Cells
# 查看参数对象中的参数设定
getParam(params, "nGenes")
getParam(params, "nCells")
getParams(params, c("nGenes", "mean.rate", "mean.shape"))
# 设定参数对象中的参数
params <- setParam(params, "nGenes", 5000)
params <- setParam(params, "nCells", 50)
params <- setParams(params, mean.shape = 0.5, de.prob = 0.2)
params <- setParams(params, update = list(nGenes = 8000, mean.rate = 0.5))# 根据参数,创建模拟数据
sim <- splatSimulate(params)### 4. 提取SingleCellExperiment类信息
class(sim)
# Information about genes
head(rowData(sim))
# Information about cells
head(colData(sim))
# Gene by cell matrices
names(assays(sim))
# Example of cell means matrix
assays(sim)$CellMeans[1:5, 1:5]
# 表达矩阵
counts <- counts(sim)
counts[1:3,1:5]class(counts) # "matrix" "array"
typeof(counts)  # [1] "integer"
dim(counts) #[1] 8000  100### 5. 数据标准化,对数化和降维
## {scuttle}包的函数:logNormCounts
## {scater}包的函数:runPCA,plotPCA
#  SingleCellExperiment 对象的counts值计算对数转换的归一化表达式值
sim <- logNormCounts(sim)
counts(sim)[1:3,1:5]
logcounts(sim)[1:3,1:5]
# PCA降维
sim <- runPCA(sim)
plotPCA(sim)### 6. 创建复杂的模拟数据
## groups
sim.groups <- splatSimulate(group.prob = c(0.3, 0.7), method = "groups",verbose = FALSE)
sim.groups <- logNormCounts(sim.groups)
sim.groups <- runPCA(sim.groups)
plotPCA(sim.groups, colour_by = "Group")dim(counts(sim.groups))
rowData(sim.groups)$DEFacGroup1
rowData(sim.groups)$DEFacGroup2
metadata(sim.groups)## paths
sim.paths <- splatSimulate(de.prob = 0.2, nGenes = 1000, method = "paths",verbose = FALSE)
sim.paths <- logNormCounts(sim.paths)
sim.paths <- runPCA(sim.paths)
plotPCA(sim.paths, colour_by = "Step")
colData(sim.paths)$Step## batches
sim.batches <- splatSimulate(batchCells = c(50, 50), verbose = FALSE)
sim.batches <- logNormCounts(sim.batches)
sim.batches <- runPCA(sim.batches)
plotPCA(sim.batches, colour_by = "Batch")
rowData(sim.batches)$BatchFacBatch1
rowData(sim.batches)$BatchFacBatch2### 7.比较SingleCellExperiment对象
## 相互比较
sim1 <- splatSimulate(nGenes = 1000, batchCells = 20, verbose = FALSE)
sim2 <- simpleSimulate(nGenes = 1000, nCells = 20, verbose = FALSE)
comparison <- compareSCEs(list(Splat = sim1, Simple = sim2))names(comparison)
comparison$RowData
comparison$ColDatanames(comparison$Plots)
comparison$Plots$Means
comparison$Plots$LibrarySizes
comparison$Plots$ZerosCell## 与参考进行比较
sim1 <- splatSimulate(nGenes = 1000, batchCells = 100, verbose = FALSE)
sim2 <- splatSimulate(nGenes = 1000, batchCells = c(40, 60), verbose = FALSE)
sim3 <- simpleSimulate(nGenes = 1000, nCells = 100, verbose = FALSE)
difference <- diffSCEs(list(Splat1 = sim1, Splat2 = sim2, Simple = sim3), ref = "Simple")
difference$Plots$Means
difference$QQPlots$Means### 8.批次效应参数设置
library("splatter")
library("scater")
library("ggplot2")# Simulation with small batch effects
sim1 <- splatSimulate(params, batchCells = c(100, 100),batch.facLoc = 0.001, batch.facScale = 0.001,verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("Small batch effects")# Simulation with big batch effects
sim2 <- splatSimulate(params, batchCells = c(100, 100),batch.facLoc = 0.5, batch.facScale = 0.5,verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Big batch effects")# 是否消除批次效应设置
sim1 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = FALSE,
verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("With batch effects")sim2 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = TRUE,verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Batch effects removed")### 9. 离群值参数设置
## ----outlier-prob-------------------------------------------------------------
# Few outliers
sim1 <- splatSimulate(out.prob = 0.001, verbose = FALSE)
ggplot(as.data.frame(rowData(sim1)),aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +geom_histogram(bins = 100) +ggtitle("Few outliers")# Lots of outliers
sim2 <- splatSimulate(out.prob = 0.2, verbose = FALSE)
ggplot(as.data.frame(rowData(sim2)),aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +geom_histogram(bins = 100) +ggtitle("Lots of outliers")### 10.设置组参数
# One small group, one big group
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.9, 0.1),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("One small group, one big group")# Five groups
sim2 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Five groups")### 11. 设置差异表达基因参数
## de.prob 参数
# Few DE genes
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.prob = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Few DE genes")# Lots of DE genes
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.prob = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Lots of DE genes")## de.facLoc参数
# Small DE factors
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.facLoc = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Small DE factors")# Big DE factors
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),de.facLoc = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Big DE factors")## complex-de---------------------------------------------------------------
# Different DE probs
sim1 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),de.prob = c(0.01, 0.01, 0.1, 0.1, 0.3),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") +labs(title = "Different DE probabilities",caption = paste("Groups 1 and 2 have very few DE genes,","Groups 3 and 4 have the default number,","Group 5 has many DE genes"))# Different DE factors
sim2 <- splatSimulateGroups(params.groups,group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),de.facLoc = c(0.01, 0.01, 0.1, 0.1, 0.2),de.facScale = c(0.2, 0.5, 0.2, 0.5, 0.4),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") +labs(title = "Different DE factors",caption = paste("Group 1 has factors with small location (value),","and scale (variability),","Group 2 has small location and greater scale.\n","Groups 3 and 4 have greater location with small,","and large scales","Group 5 has bigger factors with moderate","variability"))# Combination of everything
sim3 <- splatSimulateGroups(params.groups,group.prob = c(0.05, 0.2, 0.2, 0.2, 0.35),de.prob = c(0.3, 0.1, 0.2, 0.01, 0.1),de.downProb = c(0.1, 0.4, 0.9, 0.6, 0.5),de.facLoc = c(0.6, 0.1, 0.1, 0.01, 0.2),de.facScale = c(0.1, 0.4, 0.2, 0.5, 0.4),verbose = FALSE)
sim3 <- logNormCounts(sim3)
sim3 <- runPCA(sim3)
plotPCA(sim3, colour_by = "Group") +labs(title = "Different DE factors",caption = paste("Group 1 is small with many very up-regulated DE genes,","Group 2 has the default DE parameters,\n","Group 3 has many down-regulated DE genes,","Group 4 has very few DE genes,","Group 5 is large with moderate DE factors"))### 12. path参数设置
# Linear paths
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)
sim1 <- splatSimulatePaths(params.groups,group.prob = c(0.25, 0.25, 0.25, 0.25),de.prob = 0.5, de.facLoc = 0.2,path.from = c(0, 1, 2, 3),verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Linear paths")# Branching path
sim2 <- splatSimulatePaths(params.groups,group.prob = c(0.25, 0.25, 0.25, 0.25),de.prob = 0.5, de.facLoc = 0.2,path.from = c(0, 1, 1, 3),verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Branching path")## ----paths-steps------------------------
# Few steps
sim1 <- splatSimulatePaths(params.groups, path.nSteps = 3,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Few steps")# Lots of steps
sim2 <- splatSimulatePaths(params.groups, path.nSteps = 1000,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Step") + ggtitle("Lots of steps")## ----paths-skew-------------------
# Skew towards the end
sim1 <- splatSimulatePaths(params.groups, path.skew = 0.1,de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Skewed towards the end")

参考

https://www.bioconductor.org/packages/release/bioc/html/splatter.html

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5596896/

splatter包生成单细胞RNA测序数据相关推荐

  1. 【R语言】Splatter,一个用于简单模拟单细胞RNA测序数据的R包

    Splatter是一个用于模拟单细胞RNA测序数据的R包,本文概述并介绍Splatter的功能 一.参数功能 名称 功能 说明 可以通过splatEstimate函数估计 备注 nGenes -> ...

  2. 生信论文分享:通过稳健矩阵分解对单细胞rna测序数据进行插值

    题目:scRMD: imputation for single cell RNA-seq data via robust matrix decomposition 出处:bioinformatics, ...

  3. 文献阅读 | 基于单细胞RNA测序数据的谱系追踪

    Overcoming Genetic Drop-outs in Variants-based Lineage Tracing from Single-cell RNA Sequencing Data ...

  4. 重磅综述:三万字长文读懂单细胞RNA测序分析的最佳实践教程 (原理、代码和评述)

    原文链接: https://www.embopress.org/doi/10.15252/msb.20188746 主编评语 这篇文章最好的地方不只在于推荐了工具,提供了一套分析流程,更在于详细介绍了 ...

  5. Mila唐建团队新作:可迁移、可解释的单细胞RNA测序模型

    [栏目:前沿进展]近日,McGill大学的李岳老师和魁北克人工智能研究所Mila唐建老师团队共同提出了一种高效.易用.可拓展.可迁移.可解释的模型--scETM,用于单细胞RNA测序工作,并于Natu ...

  6. 一文了解单细胞RNA测序的可视化与统计分析如何更简单高效

    单细胞RNA测序(scRNA-seq)的出现为探索单细胞水平的基因表达谱提供了前所未有的机会.目前,scRNA-seq已成为研究细胞异质性的关键生物学问题(尤其是在肿瘤学和免疫学研究中)的首选.然而, ...

  7. Nature | 基于单细胞RNA测序绘制人类肺组织分子细胞图谱,成功鉴定多种未知细胞类型...

    单细胞RNA-seq技术已经在绘制器官基因表达谱研究中发挥了关键作用,但目前很难系统地鉴定和定位单个器官中所有分子细胞的类型,并创建完整的分子细胞图谱.近期发表的研究成果中,多个细胞类型特异性标记分子 ...

  8. 【佳学基因人工智能】RNA测序数据的信息分析——基因解码信息源的准备

    [佳学基因人工智能]RNA测序数据的信息分析--基因解码信息源的准备 人的基因信息解码策略 人的基因信息解码有两种策略,一是数据库比对策略,二是基因解码策略.数据库比对策略只能用数据库中记录过的案例. ...

  9. 单细胞RNA测序技术之入门指南

    单细胞RNA测序技术之入门指南 [字体: 大 中 小 ] 时间:2018年09月12日 来源:生物通 编辑推荐: 在这个飞速发展的测序时代,DNA和RNA测序已经逐渐成为"实验室中的家常菜& ...

最新文章

  1. python Tkinter学习笔记 menu控件 02
  2. CSS 解决td里面内容太多把表格弄变形的原因,设置 自动换行。
  3. DHCP中继代理;DHCP突破vlan限制
  4. html5基础知识点表单
  5. css3 图片放大缩小闪烁效果
  6. linux 常用命令03--修改文件的权限与归属
  7. spring elasticsearch 按条件删除_实战:项目数据源转为Elasticsearch
  8. kali安装pip3
  9. 漫步最优化四十五——矩阵S的生成
  10. 2020年“双11”各家晒出成绩单,你还没付完尾款,有的人已经收货了!
  11. 支持医学研究的Apple开源移动框架
  12. c++ string字符串翻转
  13. 互不相识的人在什么情况下会给你点赞呢?
  14. 【企业架构】什么是 Zachman 框架? 用于管理企业架构的矩阵
  15. Kali Linux 基于Easy File Sharing Web Server 6.9 编写漏洞渗透模块 (上)
  16. 云诺网盘为什么关停了好用的企业网盘有哪些
  17. 信号与系统作业之我的朋友把我的大作业分享了好朋友
  18. 大数据学习计划【2019经典不断更新】
  19. azkaban任务一直处于preparing,解决办法
  20. json学习笔记(圣思园视频学习笔记)

热门文章

  1. 实体-关系联合抽取:Incremental Joint Extraction of Entity Mentions and Relations
  2. linux 下安装apache 快速教程
  3. Picture2Epub
  4. 系统集成项目管理工程师05《项目进度管理》
  5. 年终盘点!2022顶会论文代码大合集!
  6. pythonxpath判断元素是否存在_Python Lxml(objectify):检查标签是否存在
  7. pycharm解决光标变粗,关闭改写模式
  8. 如何创建低成本沙箱环境?推荐你使用API仿真!
  9. 一个C#写的爬虫程序
  10. 散列函数(哈希函数,Hash Function)