getwd()
path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" #空间转录组
dir.create(path)
setwd(path)
getwd()
list.files()raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv")
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbolhead(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
counts=counts[,-1]library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428
ids=na.omit(ids)
dim(ids) # [1] 15504
length(unique(ids$SYMBOL)) # [1] 15494
# 这里的关系超级乱,互相之间都不是一对一
# 凡是混乱的ID一律删除即可
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce
#rownames(hp_sce) = ids$SYMBOL
# RenameGenesSeurat  -----------------------------------------------
#创建函数 改名字
RenameGenesSeurat <- function(obj , newnames ) { # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. # It only changes obj@assays$RNA@counts, @data and @scale.data.print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")RNA <- obj@assays$RNAif (nrow(RNA) == length(newnames)) {if (length(RNA@counts)) RNA@counts@Dimnames[[1]]            <- newnamesif (length(RNA@data)) RNA@data@Dimnames[[1]]                <- newnamesif (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]]    <- newnames} else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}obj@assays$RNA <- RNAreturn(obj)
}hp_sce=RenameGenesSeurat(obj = hp_sce, newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')hp_sce
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")getwd()plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_scehp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))), rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;table(sce$stim)library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()
##########################run harmonyAll <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony') pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()
#############cluster
#library(harmony)
All <- All %>% RunUMAP(reduction = "harmony", dims = 1:30) %>% RunTSNE(reduction = "harmony", dims = 1:30) %>% FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()options(repr.plot.height = 4, repr.plot.width = 10)
pdf("3_after_harmony_umap_two_group.pdf")
DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
dev.off()pdf("3_after_harmony_cluster_UMAP.pdf")
DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
dev.off()pdf("3_umap_samples_split.pdf")
DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
dev.off()pdf("3_after_harmony_tsne_two_group.pdf")
DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
dev.off()pdf("3_after_harmony_cluster_tSNE.pdf")
DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
dev.off()pdf("3_tSNE_samples_split.pdf")
DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
dev.off()
getwd()
#save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")

geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat相关推荐

  1. GEO学习笔记-P3 表达矩阵过滤

    学习材料:[生信技能树]公共数据库挖掘实例(基于R语言) bilibili版本以及后续更新课程中的github材料为基础. 本章节是以:[生信技能树]公共数据库挖掘实例(基于R语言)为基础,进行的代码 ...

  2. seurat提取表达矩阵_如何改造Seurat包的DoHeatmap函数?

    刘小泽写于19.12.4 分析过单细胞数据的小伙伴应该都使用过Seurat包,其中有个函数叫DoHeatmap,具体操作可以看: 单细胞转录组学习笔记-17-用Seurat包分析文章数据 前言 走完S ...

  3. geo读取表达矩阵 RNA-seq R语言部分(表达矩阵合并及id转换)

    geo读取表达矩阵 RNA-seq R语言 方法一:1.从geo页面直接下载表达矩阵,然后通过r读取表达矩阵 2.利用getgeo函数读取表达矩阵 3.利用geo自带的geo2r,调整p值为1,获取探 ...

  4. python和R写出表达矩阵为稀疏矩阵matrix.mtx.gz的方法

    ###python部分 加载读取稀疏矩阵的mmread和构建数据框的pandas from scipy.io import mmread import pandas as pd import nump ...

  5. c++ 合并2个txt_多个表达矩阵文件合并

    前些天群主给了我们学徒一个任务,下载数据集:GSE84073 做一些批量分析! 群主想看到,HCC,CHC,CC这3组,跟healthy的分开比较,然后3个火山图,3个热图. 那么首先需要下载coun ...

  6. seurat提取表达矩阵_GPL17586、GPL19251和GPL16686平台芯片ID转换

    芯片分析中经常会遇到Affymetrix Human Transcriptome Array 2.0芯片,由于目前还没有现成的R包可以用,因此分析方法也不统一.见生信技能树Jimmy老师HTA2.0芯 ...

  7. seurat提取表达矩阵_Hemberg-lab单细胞转录组数据分析

    单细胞RNA-seq简介 混合RNA-seq2000年末的重大技术突破,取代微阵列表达芯片被广泛使用 通过混合大量细胞获取足够RNA用于建库测序,来定量每个基因的平均表达水平 用于比较转录组,例如比较 ...

  8. 单细胞分析实录(2): 使用Cell Ranger得到表达矩阵

    Cell Ranger是一个"傻瓜"软件,你只需提供原始的fastq文件,它就会返回feature-barcode表达矩阵.为啥不说是gene-cell,举个例子,cell has ...

  9. seurat提取表达矩阵_什么,你一定要基于FPKM标准化表达矩阵做单细胞差异分析...

    学徒和学员已经陆续出师,是时候把生信技能树的舞台交给后辈了!下面是<生信入门第7期>学员的分享最近看到有一个简单的数据挖掘文章,就做了一个单细胞表达矩阵的差异分析,然后强行解释一波.而且使 ...

最新文章

  1. other-如何可以查看别人请求的输出结果
  2. 恒驰机器人_恒大汽车基地:2545台机器人为恒驰“效力”
  3. vscode的 jsonp 配置文件
  4. 字符串转换为整数的源码atoi()
  5. 【Python3_进阶系列_010】Python3-生成器
  6. 【Java】函数使用
  7. 如何用tomcat发布自己的Java项目
  8. JSON应用场景与实战
  9. J-Link V9固件修复
  10. sql2000 mysql_sql2000迷你版 超精简版SQL Server 2000数据库下载
  11. 第十七周助教工作总结——NWNU李泓毅
  12. java 取结果集的最后三项,
  13. 论文笔记:主干网络——SENet
  14. 您的计算机无法启动磁盘损坏,解决办法:如何修复SATA硬盘损坏并无法启动?...
  15. python发音小程序
  16. 表单上下间隔怎么设置php,html中怎么设置每行文字的间隔
  17. GemFire 异步写和同步读
  18. 问题 C: Fraction 分数类 I
  19. Unity开发win10软件系列问题6: unity调用 win10 虚拟键盘tabtip.exe
  20. js爬取:bili播放列表,右下角建立红底白字下载按钮,保存为csv格式到本地

热门文章

  1. 北京最新建筑八大员(电气)考试题库及答案
  2. (四)Gluster 配置
  3. IPTV系统整体架构
  4. 和AI一起玩儿剧本杀:居然比我还入戏
  5. 计算机软件测试与开发编程实习报告30篇(附周记日志)
  6. JS中定时器和延时调用学习笔记
  7. iPad 3g版完美实现打电话功能(phoneitipad破解)
  8. 订书机在工作中是否重要
  9. pdf怎么转换成图片?学会这几种方法,快速转换
  10. 关于Excel表操作-通过gensim实现模糊匹配