直接放代码

R代码

gc()
library('magrittr')
setwd("~/Documents/48sample/mag/")
#合成丰度文件
data.table::fread("DNA.geneabundance.txt(1)", sep="\t", header=T, stringsAsFactors=F) %>%data.frame()->profile
sample <- c('GeneID','X127', 'X22', 'X129', 'X48', 'X138', 'X47', 'X140', 'X20', 'X132', 'X18', 'X149', 'X73', 'X120', 'X105', 'X148', 'X44', 'X122', 'X30', 'X124', 'X26', 'X143', 'X34', 'X133', 'X12', 'X145', 'X21', 'X152', 'X10', 'X118', 'X102', 'X147', 'X80', 'X121', 'X1', 'X151', 'X35', 'X135', 'X38', 'X136', 'X2', 'X150', 'X17', 'X146', 'X55', 'X141', 'X11', 'X137', 'X97')
profile<-profile[,which(colnames(profile) %in% sample)]
data.table::fread("~/Documents/48sample/mag/annotation.emapper.annotations", sep="\t", header=F, stringsAsFactors=F,fill=TRUE) %>%data.frame() ->ko
colnames(ko)<- c("query_name", "seed_eggNOG_ortholog", "seed_ortholog_evalue", "seed_ortholog_score", "Predicted_taxonomic_group", "Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "tax_scope:_eggNOG_taxonomic_level_used_for_annotation", "eggNOG_OGs_", "bestOG_(deprecated,_use_smallest_from_eggnog_OGs)", "COG_Functional_Category", "eggNOG_free_text_description")
ko<-ko[,which(colnames(ko) %in% c("query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"))]
ko[,1]<-gsub("_1$","",ko[,1])
merged<-merge(ko,profile,by.x="query_name", by.y="GeneID",all.y=T)
setwd("~/Documents/48sample/mag/DNAgene")
rm(ko,profile,sample)
gc()merged<-reshape2::melt(merged,id=c("query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"))
write.table(merged,file="melt.csv",sep=",")list<-c("Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category")
for (i in length(list)){reshape2::acast(merged, list[i]~variable,sum)%>%t()%>%data.frame() %>% write.csv(.,file=paste(list[i],"abund.csv"))
echo i} 

pandas代码

import pandas as pd
import numpy as np
import os
os.chdir("/mnt/10t/mzy/06profile/mag")
profile=pd.read_csv("DNA.geneabundance.txt(1)",sep="\t",header=0,index_col=0)
profile=profile[['127', '22', '129', '48', '138', '47', '140', '20', '132', '18', '149', '73', '120', '105', '148', '44', '122', '30', '124', '26', '143', '34', '133', '12', '145', '21', '152', '10', '118', '102', '147', '80', '121', '1', '151', '35', '135', '38', '136', '2', '150', '17', '146', '55', '141', '11', '137', '97']]
ko=pd.read_csv("annotation.emapper.annotations",sep="\t")
ko.columns=["query_name", "seed_eggNOG_ortholog", "seed_ortholog_evalue", "seed_ortholog_score", "Predicted_taxonomic_group", "Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "tax_scope:_eggNOG_taxonomic_level_used_for_annotation", "eggNOG_OGs_", "bestOG_(deprecated,_use_smallest_from_eggnog_OGs)", "COG_Functional_Category", "eggNOG_free_text_description"]
ko=ko[["query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"]]
ko["query_name"].replace("_1$","",regex=True,inplace=True)
merged=pd.merge(ko,profile,left_on="query_name",right_on="GeneID",how="right")merged.columns.values
str(merged)
###merged.rename(columns={'127':'X127', '22':'X22', '129':'X129','48':'X48', '138':'x138', '47':'X47', '140':"X140", '20':'X20', '132':'X132', '18':'X18', '149':'X149', '73':'X73', '120':'X120','105':'X105', '148':'X148', '44':'X44', '122':'X122', '30':'X30', '124':'X124', '26':'X26', '143':'X143', '34':'X34', '133':'X133','12':'X12', '145':'X145', '21':'X21', '152':'X152', '10':'X10', '118':'X118', '102':'X102', '147':'X147', '80':'X80', '121':'X121','1':'X1', '151':'X151', '35':'X35', '135':'X135', '38':'X38', '136':'X136', '2':'X2', '150':'X150', '17':'X17', '146':'X146','55':'X55', '141':'X141', '11':'X11', '137':'X137', '97':'X97'}) ##列名是数字会出问题melted=merged.melt(id_vars=["query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"])
###以下测试内容
#pd.pivot_table(melted,index=[u'Predicted_protein_name'],columns=[u'variable'],aggfunc=[sum]).to_csv('test.csv')
###以下循环内容
for i in ["Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"]:pd.pivot_table(melted,index=i,columns=[u'variable'],aggfunc=[sum]).to_csv("/mnt/10t/mzy/06profile/mag/DNAgene/"+i+".csv")

处理RNA的

import pandas as pd
import numpy as np
import os
os.chdir("/mnt/10t/mzy/06profile/mag")
profile=pd.read_csv("RNA.geneabundance.txt",sep="\t",header=0,index_col=0)
profile=profile[["127","22","129","81","138","99","140","20","132","82","149","73","120","105","148","44","122","101","124","100","143","76","133","12","145","79","152","77","118","102","147","80","121","106","151","104","135","38","136","2","150","78","146","55","141","11","137","97"]]
ko=pd.read_csv("annotation.emapper.annotations",sep="\t")
ko.columns=["query_name", "seed_eggNOG_ortholog", "seed_ortholog_evalue", "seed_ortholog_score", "Predicted_taxonomic_group", "Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "tax_scope:_eggNOG_taxonomic_level_used_for_annotation", "eggNOG_OGs_", "bestOG_(deprecated,_use_smallest_from_eggnog_OGs)", "COG_Functional_Category", "eggNOG_free_text_description"]
ko=ko[["query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"]]
ko["query_name"].replace("_1$","",regex=True,inplace=True)
merged=pd.merge(ko,profile,left_on="query_name",right_on="GeneID",how="right")merged.columns.valuesmelted=merged.melt(id_vars=["query_name","Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"])
###以下测试内容
#pd.pivot_table(melted,index=[u'Predicted_protein_name'],columns=[u'variable'],aggfunc=[sum]).to_csv('test.csv')
###以下循环内容
for i in ["Predicted_protein_name", "Gene_Ontology_terms_", "EC_number", "KEGG_ko", "KEGG_Pathway", "KEGG_Module", "KEGG_Reaction", "KEGG_rclass", "BRITE", "KEGG_TC", "CAZy_", "BiGG_Reaction", "COG_Functional_Category"]:pd.pivot_table(melted,index=i,columns=[u'variable'],aggfunc=[sum]).to_csv("/mnt/10t/mzy/06profile/mag/RNAgene/"+i+".csv")

数据清洗强烈不建议用R,虽然python也不快,但是比R强

R和pandas实现透视表(pivot; cast/dcast/acast)和逆透视表(melt)过程相关推荐

  1. Power BI中的透视列和逆透视

    透视列(Pivot)和逆透视列(Unpivot)是在Excel当中就经常使用的一对数据聚合和拆分方法,在Power BI桌面应用中也提供了同样的功能. 透视列操作是将列下所有的N个非重复数据转换成N个 ...

  2. 邻接表转化为逆邻接表

    题目描述: 已知有n个顶点的有向图G的邻接表,设计算法求邻接表G的逆邻接表. 思路: 将邻接表转化为逆邻接表需要遍历所有邻接表的整个顶点表G,然后便可得到每个顶点有哪些顶点指向它,然后将其信息放入逆邻 ...

  3. WEB数据透视表Pivot Table

    原文来自方案网 http://www.fanganwang.com/Product-detail-item-1451.html,欢迎转载. 关键字:透视表Pivot Table,交叉透视表,数据透视表 ...

  4. Pandas 统计分析基础 笔记5 _任务4.5 创建透视表与交叉表

    文章目录 pandas__任务4.5 创建透视表与交叉表 4.5 创建透视表与交叉表 代码4-67 使用订单号作为透视表索引制作透视表 代码 4-68 修改聚合函数后的透视表 代码 4-69 使用订单 ...

  5. 数据透视表与mysql_通过sql做数据透视表,数据库表行列转换(pivot和Unpivot用法)(一)...

    在mssql中大家都知道可以使用pivot来统计数据,实现像excel的透视表功能 一.MSsqlserver中我们通常的用法 1.Sqlserver数据库测试 ---创建测试表 Create tab ...

  6. python处理excel数据透视表_Python也能轻松做出Excel透视表的效果,一切技巧全在这里...

    此系列文章收录在公众号中:数据大宇宙 > 数据处理 >E-pd 经常听别人说 Python 在数据领域有多厉害,结果学了很长时间,连数据处理都麻烦得要死.后来才发现,原来不是 Python ...

  7. python数据逆透视_Python数据神器pandas,轻松搞定嵌套表头——透视与逆透视

    发现许多小伙伴入门Python几个月,还是低效率做数据处理.这套课程以形象的示意图,精心安排的案例,循序渐进带你玩转数据处理分析神器--pandas,课程中还有分析案例噢,干货满满! 前言 上一节我们 ...

  8. excel 透视表 vba_使用Excel VBA删除数据透视表计算字段

    excel 透视表 vba Yesterday, I started out with the best of intentions, planning to get some work done, ...

  9. php 生成excel透视表,利用Javascript仿Excel的数据透视分析功能

    什么是数据透视分析? 数据透视分析就是要在 不同维度对数据进行汇总,过滤,分析,比较,作图.用来发现数据的变化趋势和不同因素导致的差异. 这在销售,统计,金融 等方面十分有用,常常会在一些管理软件中使 ...

最新文章

  1. 打造生物智能和人工智能“双螺旋”,智源研究院发布“人工智能的认知神经基础”重大研究方向...
  2. spyder jupyter集成
  3. 文本文件与二进制文件及编码关系
  4. 《集体智慧编程》第九章
  5. ZYAR20A 亚克力2驱 蓝牙 298寻迹避障机器人 —— 安装过程
  6. java使用poi操作excel文件_使用 java apache poi 操作 excel xlsx 文件
  7. The Nighth Assignment
  8. pytorch minist
  9. QCC3024/QCC3020/QCC3034 蓝牙对讲
  10. Java 用两个队列实现一个栈
  11. FPGA产生m序列及其应用
  12. npm install 安装一直报错Error EPERM operation not permitted, mkdir
  13. 用Matplotlib实现世界GDP动态排名
  14. TCR宝藏级隐藏项目National Writing Board
  15. 有道云笔记客户端的下载和安装、使用(博主推荐)
  16. 1688电商API接口-无需多个 电商平台单独对接
  17. 微信公众平台与微信开放平台的区别、服务号、订阅号、企业微信的区别
  18. 比较快得Maven镜像
  19. 记一次 feign.FeignException: status 404 reading xxx 问题解决
  20. ssm+java农村快递代取平台52wxh(程序+lw+源码+远程部署)

热门文章

  1. 土地利用/覆被变化(LUCC)定义理解
  2. 计算机常用英语单词[转]
  3. SAP中的邮件通讯处理
  4. vmware NAT模式网络连接无法上网
  5. matlab fm非相干解调,FM调制和相干解调,非相干解调.pdf
  6. matlab如何读取.mat文件,matlab中读取mat文件
  7. 最简单dophinscheduler 集成datax步骤
  8. Redis-敲黑板划重点
  9. 【杂叙】北京住房公积金提取
  10. Html5 Api 实现浏览器全屏