#分别筛选出known和novel,集群

cd /disk/zhw/CRClncRNA/filter/lncRNA

cat lncRNA.final.v2.gtf|grep 'known' >lncRNA.final.v2.known.gtf

cat lncRNA.final.v2.gtf|grep 'novel'>lncRNA.final.v2.novel.gtf

#转为bed文件,zuo

cd /data1/users/zzuo/xulab/CRC_lncRNA/hongwan

gtf2bed < lncRNA.final.v2.known.gtf|sort-bed - > lncRNA.final.v2.known.bed

gtf2bed < lncRNA.final.v2.novel.gtf|sort-bed - > lncRNA.final.v2.novel.bed

#RSEM 集群

cd /disk/zhw/CRClncRNA/fastq_data/first

ls *_1.clean.fq.gz|sed 's/_1.clean.fq.gz//g'>SRR.txt

/disk/soft/CPAT-1.2.3/bin/cpat.py -r /disk/database/human/hg38/Gencode/genome.fa -g /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.bed

#fastp clean data

cd /disk/zhw/CRClncRNA/fastq_data/sec

fastp -i D6_HKNNLCCXX_L4_1.fq.gz -o D6_HKNNLCCXX_L4_1.clean.fq.gz -I D6_HKNNLCCXX_L4_2.fq.gz -O D6_HKNNLCCXX_L4_2.clean.fq.gz -w 8

#集群

#转为三列bed

cd /disk/zhw/CRClncRNA/filter/lncRNA

cat lncRNA.final.v2.known.gtf|awk '{print $1,$4,$5}'|sort -u |sed 's/ /\t/g'>lncRNA.final.v2.known2.bed

cat lncRNA.final.v2.novel.gtf|awk '{print $1,$4,$5}'|sort -u |sed 's/ /\t/g'>lncRNA.final.v2.novel2.bed

cat protein_coding.final.gtf|awk '{print $1,$4,$5}'|sort -u |sed 's/ /\t/g'>protein_coding.final2.bed

#34fuwuq

#novel

cd /data2/zhw/CRC_lncRNA

less lncRNA.final.v2.novel2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' >lncRNA.final.v2.novel3.bed

/data/software/bwtool-master/bwtool extract bed lncRNA.final.v2.novel3.bed /data/database/hg38/hg38.phastCons100way.bw novel_bwtools

less -S novel_bwtools| perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' >novel.mean.txt

#known

less lncRNA.final.v2.known2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' >lncRNA.final.v2.known3.bed

/data/software/bwtool-master/bwtool extract bed lncRNA.final.v2.known3.bed /data/database/hg38/hg38.phastCons100way.bw known_bwtools

less -S known_bwtools| perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' >knonw.mean.txt

#protein

less protein_coding.final2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' >protein_coding.final3.bed

/data/software/bwtool-master/bwtool extract bed protein_coding.final3.bed /data/database/hg38/hg38.phastCons100way.bw protein_bwtools

less -S protein_bwtools| perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' >protein.mean.txt

#合并(集群)

cd /disk/zhw/CRClncRNA/filter/RSEM

python pcRNA_RSEM_merge.py 5 pcRNA.rsem.count

python pcRNA_RSEM_merge.py 6 pcRNA.rsem.TPM

python pcRNA_RSEM_merge.py 7 pcRNA.rsem.FPKM

python lncRNA_RSEM_merge.py 5 lncRNA.rsem.count

python lncRNA_RSEM_merge.py 6 lncRNA.rsem.TPM

python lncRNA_RSEM_merge.py 7 lncRNA.rsem.FPKM

cd /disk/zhw/CRClncRNA/filter/lncRNA

cat lncRNA.final.v2.gtf|awk '{print $1,$2,$4,$5,$10}'|sed 's/"//g'|sed 's/;//g'|sed 's/ /\t/g' >lncRNA.final.v2.cp.2.txt

#画circos图

#分类

#运行D:\CRC_lncRNA目录的distinguish_known_novel.R

#node1 root

#test

head -1000 hs_exp_data/hs_normal_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_normal_know_gtf_data3.txt

head -1000 hs_exp_data/hs_normal_novel_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_normal_novel_gtf_data3.txt

head -1000 hs_exp_data/hs_unrecurrence_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_unrecurrence_know_gtf_data3.txt

head -1000 hs_exp_data/hs_unrecurrence_novel_gtf_data.txt | awk {'print $1,$2,$3,$4'}>hs_unrecurrence_novel_gtf_data3.txt

head -1000 hs_exp_data/hs_recurrence_novel_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_recurrence_novel_gtf_data3.txt

head -1000 hs_exp_data/hs_recurrence_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_recurrence_know_gtf_data3.txt

/disk/soft/circos-0.69-6/bin/circos -conf circoshhj_test.conf

cat hs_exp_data/hs_normal_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_normal_know_gtf_data2.txt

cat hs_exp_data/hs_normal_novel_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_normal_novel_gtf_data2.txt

cat hs_exp_data/hs_unrecurrence_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_unrecurrence_know_gtf_data2.txt

cat hs_exp_data/hs_unrecurrence_novel_gtf_data.txt | awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_unrecurrence_novel_gtf_data2.txt

cat hs_exp_data/hs_recurrence_novel_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_recurrence_novel_gtf_data2.txt

cat hs_exp_data/hs_recurrence_know_gtf_data.txt| awk {'print $1,$2,$3,$4'}>hs_exp_data/hs_recurrence_know_gtf_data2.txt

/disk/soft/circos-0.69-6/bin/circos -conf circoshhj.conf

#编码能力分析

#集群节点2

#把gtf转为fa文件

cd /disk/zhw/CRClncRNA/filter/lncRNA

gffread lncRNA.final.v2.known.gtf -o-|gffread - -g /disk/database/human/hg38/Gencode/genome.fa -w lncRNA.final.v2.known.fa -W

gffread lncRNA.final.v2.novel.gtf -o-|gffread - -g /disk/database/human/hg38/Gencode/genome.fa -w lncRNA.final.v2.novel.fa -W

gffread protein_coding.final.gtf -o-|gffread - -g /disk/database/human/hg38/Gencode/genome.fa -w protein_coding.final.fa -W

#CPAT

cd /disk/zhw/CRClncRNA/CPAT

cpat.py -g /disk/zhw/CRClncRNA/filter/lncRNA/protein_coding.final.fa -d Human_logitModel.RData -x Human_Hexamer.tsv -o CPAT_protein_coding.final

cpat.py -g /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel.fa -d Human_logitModel.RData -x Human_Hexamer.tsv -o CPAT_lncRNA.final.v2.novel

cpat.py -g /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.fa -d Human_logitModel.RData -x Human_Hexamer.tsv -o CPAT_lncRNA.final.v2.known

#CNCI

cd /disk/zhw/CRClncRNA/CNCI

python /disk/soft/CNCI-master/CNCI.py -f /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel.fa -o CNCI_lncRNA.final.v2.novel -m ve -p 4

python /disk/soft/CNCI-master/CNCI.py -f /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.fa -o CNCI_lncRNA.final.v2.known -m ve -p 4

python /disk/soft/CNCI-master/CNCI.py -f /disk/zhw/CRClncRNA/filter/lncRNA/protein_coding.final.fa -o CNCI_protein_coding.final -m ve -p 4

#bwtool

#去除长度为0的

cd /disk/zhw/CRClncRNA/bwtools

less /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' > lncRNA.final.v2.known_filt.bed

less /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' > lncRNA.final.v2.novel_filt.bed

less /disk/zhw/CRClncRNA/filter/lncRNA/protein_coding.final2.bed | perl -wanle'$b++;$a=$F[2]-$F[1];print $_ unless $a==0' > protein_coding.final_filt.bed

/disk/soft/bwtool/bwtool extract bed lncRNA.final.v2.known_filt.bed /disk/database/human/hg38/hg38.phastCons100way.bw lncRNA.final.v2.known

/disk/soft/bwtool/bwtool extract bed lncRNA.final.v2.novel_filt.bed /disk/database/human/hg38/hg38.phastCons100way.bw lncRNA.final.v2.novel

/disk/soft/bwtool/bwtool extract bed protein_coding.final_filt.bed /disk/database/human/hg38/hg38.phastCons100way.bw protein_coding.final

#求平均值

less -S protein_coding.final | perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' > bwtool_protein_coding.final_mean.txt

less -S lncRNA.final.v2.known | perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' > bwtool_lncRNA.final.v2.known_mean.txt

less -S lncRNA.final.v2.novel | perl -wanle'my @G=split /,/,$F[4];my $a=0;my $b=0;for(@G){next if /NA/;$a+=$_;$b++}if($b==0){$c="NA"}else{$c=$a/$b}print join "\t", @F[0..3], $c' > bwtool_lncRNA.final.v2.novel_mean.txt

#找出lncRNA附件的蛋白质基因名字

cd /disk/zhw/CRClncRNA/filter/lncRN

cat lncRNA.final.v2.gtf |awk '{print $10}'|sed 's/"//g'|sed 's/;//g'|sed 's/ /\t/g'|uniq > lncRNA.final.v2.gtf.genename.txt

#画ecdf图

#运行 D:\CRC_lncRNA\coding_potential\coding_potential_ecdf.R

#画密度曲线图

D:\CRC_lncRNA\filter\lncRNA\\exon_length.py

D:\CRC_lncRNA\filter\lncRNA\\length_desitny.R

#差异表达

#表达箱线图

D:\CRC_lncRNA\filter\RSEM_expression

boxplot.R

#kegg富集分析

#excell 分割出基因symbol,david分析,excell画图

####CNV

cd /disk/zhw/CRClncRNA/filter/lncRNA

sort -k 1V,1 -k 2n,2 lncRNA.final.v2.novel.bed >lncRNA.final.v2.novel.sorted.bed

sort -k 1V,1 -k 2n,2 lncRNA.final.v2.known.bed >lncRNA.final.v2.known.sorted.bed

#所有novel/known lncRNA 基因id

less lncRNA.final.v2.novel.sorted.bed | perl -wanle'next if $F[0]=~/_/;print $_' >lncRNA.final.v2.novel.sorted2.bed

less lncRNA.final.v2.known.sorted.bed | perl -wanle'next if $F[0]=~/_/;print $_' >lncRNA.final.v2.known.sorted2.bed

mv lncRNA.final.v2.novel.sorted2.bed lncRNA.final.v2.novel.sorted.bed

mv lncRNA.final.v2.known.sorted2.bed lncRNA.final.v2.known.sorted.bed

cat lncRNA.final.v2.novel.sorted.bed|awk '{print $11}'|sed 's/";//g'|sed 's/"//g'|uniq >lncRNA.final.v2.novel.geneid2.txt

cat lncRNA.final.v2.known.sorted.bed|awk '{print $11}'|sed 's/";//g'|sed 's/"//g'|uniq >lncRNA.final.v2.known.geneid2.txt

sort lncRNA.final.v2.novel.geneid2.txt |uniq >lncRNA.final.v2.novel.geneid.txt

sort lncRNA.final.v2.known.geneid2.txt |uniq >lncRNA.final.v2.known.geneid.txt

#所有novel/known lncRNA 转录本id

cat lncRNA.final.v2.novel.sorted2.bed|awk '{print $13}'|sed 's/";//g'|sed 's/"//g'|uniq >lncRNA.final.v2.novel.transcriptid.txt

cat lncRNA.final.v2.known.sorted2.bed|awk '{print $13}'|sed 's/";//g'|sed 's/"//g'|uniq >lncRNA.final.v2.known.transcriptid.txt

cd /disk/zhw/CRClncRNA/cnv

cat scores.gistic |awk '{print "chr"$2,$3,$4,$1,$8}'|sed 's/ /\t/g'>scores.gistic.bed

/disk/soft/CrossMap-0.2.7/bin/CrossMap.py bed /disk/database/human/convert_chain/hg19ToHg38.over.chain.gz scores.gistic.bed 19_38scores.gistic

sort -k 1V,1 -k 2n,2 19_38scores.gistic >19_38scores.sorted.gistic

#分为Amp Del

cat 19_38scores.sorted.gistic|grep "Amp"|awk '{print $1,$2,$3,$5}' |sed 's/chr/hs/g'>Amp_19_38scores.sorted.gistic

cat 19_38scores.sorted.gistic|grep "Del"|awk '{print $1,$2,$3,$5}' |sed 's/chr/hs/g'>Del_19_38scores.sorted.gistic

bedtools intersect -a 19_38scores.sorted.gistic -b /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel.sorted.bed -wb >novel_scores.gistic.bed

bedtools intersect -a 19_38scores.sorted.gistic -b /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.sorted.bed -wb >known_scores.gistic.bed

#circos图使用,不要了

#cat novel_scores.gistic.bed |grep "Amp"|awk {'print $1,$7,$8,$5*100'}|sed 's/chr/hs/g'>Amp_novel_scores.gistic.bed

#cat novel_scores.gistic.bed |grep "Del"|awk {'print $1,$7,$8,$5*100'}|sed 's/chr/hs/g'>Del_novel_scores.gistic.bed

#cat known_scores.gistic.bed |grep "Amp"|awk {'print $1,$7,$8,$5*100'}|sed 's/chr/hs/g'>Amp_known_scores.gistic.bed

#cat known_scores.gistic.bed |grep "Del"|awk {'print $1,$7,$8,$5*100'}|sed 's/chr/hs/g'>Del_known_scores.gistic.bed

#取位点中点和+1

#le novel_scores.gistic.bed |grep "Amp"| perl -wanle'$mean = int($F[6]+$F[7]);$mean2 = $mean+1;$haha=$F[4]*100;$F[0]=~s/chr/hs/;print "$F[0] $mean $mean2 $haha"'|uniq>Amp_novel_scores.gistic.bed

#le novel_scores.gistic.bed |grep "Del"| perl -wanle'$mean = int($F[6]+$F[7]);$mean2 = $mean+1;$haha=$F[4]*100;$F[0]=~s/chr/hs/;print "$F[0] $mean $mean2 $haha"'|uniq>Del_novel_scores.gistic.bed

#le known_scores.gistic.bed |grep "Amp"| perl -wanle'$mean = int($F[6]+$F[7]);$mean2 = $mean+1;$haha=$F[4]*100;$F[0]=~s/chr/hs/;print "$F[0] $mean $mean2 $haha"'|uniq>Amp_known_scores.gistic.bed

#le known_scores.gistic.bed |grep "Del"| perl -wanle'$mean = int($F[6]+$F[7]);$mean2 = $mean+1;$haha=$F[4]*100;$F[0]=~s/chr/hs/;print "$F[0] $mean $mean2 $haha"'|uniq>Del_known_scores.gistic.bed

#绘制circos图

cd /disk/zhw/CRClncRNA/cnv/circos

/disk/soft/circos-0.69-6/bin/circos -conf circoscnv.conf

#test:

#/disk/soft/circos-0.69-6/bin/circos -conf circos2.conf

#head -1000 novel_scores.gistic.bed |grep "Amp"|awk {'print $1,$2,$3,$5*100'}|sed 's/chr/hs/g'>Amp_novel_scores.gistic2.bed

#head -1000 known_scores.gistic.bed |grep "Amp"|awk {'print $1,$2,$3,$5*100'}|sed 's/chr/hs/g'>Amp_known_scores.gistic2.bed

#head -1000 novel_scores.gistic.bed |grep "Del"|awk {'print $1,$2,$3,$5*100'}|sed 's/chr/hs/g'>Del_novel_scores.gistic2.bed

#head -1000 known_scores.gistic.bed |grep "Del"|awk {'print $1,$2,$3,$5*100'}|sed 's/chr/hs/g'>Del_known_scores.gistic2.bed

#筛选percent大于25的、绘制柱状图、做overlap

cat novel_scores.gistic.bed|awk -F"\t" '$5>0.25{print $0}'>percentages25novel_scores.gistic.bed

cat known_scores.gistic.bed|awk -F"\t" '$5>0.25{print $0}'>percentages25known_scores.gistic.bed

#novel/known lncRNA 转录本id

cat percentages25novel_scores.gistic.bed |awk '{print $18}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25novel.transcriptid.txt

cat percentages25known_scores.gistic.bed |awk '{print $18}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25known.transcriptid.txt

#novel/known lncRNA 基因id

cat percentages25novel_scores.gistic.bed |awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25novel.geneid.txt

cat percentages25known_scores.gistic.bed |awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25known.geneid.txt

#分别提取出novel和known的基因id和对应的CNV频率(Amp和Del分开)

cat novel_scores.gistic.bed|grep "Amp"|awk '{print $1,$2,$3,$4,$5,$16}'|sed 's/";//g'|sed 's/"//g'|sort|uniq>rep_all_novel.Amp.geneid_precent.gistic

cat novel_scores.gistic.bed|grep "Del"|awk '{print $1,$2,$3,$4,$5,$16}'|sed 's/";//g'|sed 's/"//g'|sort|uniq>rep_all_novel.Del.geneid_precent.gistic

cat known_scores.gistic.bed|grep "Amp"|awk '{print $1,$2,$3,$4,$5,$16}'|sed 's/";//g'|sed 's/"//g'|sort|uniq>rep_all_known.Amp.geneid_precent.gistic

cat known_scores.gistic.bed|grep "Del"|awk '{print $1,$2,$3,$4,$5,$16}'|sed 's/";//g'|sed 's/"//g'|sort|uniq>rep_all_known.Del.geneid_precent.gistic

#python获取到没有比对部分,合并两个文件#获取到有cnv的 取到没有CNV变异的复制为0 合并 #排序

#cd /disk/zhw/CRClncRNA/cnv/heatmap

#perl uniq_mean.pl ../rep_all_novel.Amp.geneid_precent.gistic >rep_all_novel.Amp.geneid_precent_gene_num.gistic

#python addzero_lncRNA2.py /disk/zhw/CRClncRNA/cnv/rep_all_novel.Amp.geneid_precent.gistic novel_Amp_no_bedtools_lncRNA.bed

#cat rep_all_novel.Amp.geneid_precent_gene_num.gistic novel_Amp_no_bedtools_lncRNA.bed > all_novel.Amp.geneid_precent2.gistic

#sort -k1,1V -k2,2n all_novel.Amp.geneid_precent2.gistic>all_novel.Amp.geneid_precent2_dorted.gistic3

#Del

#perl uniq_mean.pl ../rep_all_novel.Del.geneid_precent.gistic >rep_all_novel.Del.geneid_precent_gene_num.gistic

#python addzero_lncRNA2.py /disk/zhw/CRClncRNA/cnv/rep_all_novel.Del.geneid_precent.gistic novel_Del_no_bedtools_lncRNA.bed

#cat rep_all_novel.Del.geneid_precent_gene_num.gistic novel_Del_no_bedtools_lncRNA.bed > all_novel.Del.geneid_precent2.gistic

#sort -k1,1V -k2,2n all_novel.Del.geneid_precent2.gistic>all_novel.Del.geneid_precent2_dorted.gistic3

#perl uniq_mean.pl all_novel.Del.geneid_precent2.gistic >all_novel.Del.geneid_precent_gene_num.gistic

#perl uniq_mean.pl all_known.Amp.geneid_precent2.gistic >all_known.Amp.geneid_precent_gene_num.gistic

#perl uniq_mean.pl all_known.Del.geneid_precent2.gistic >all_known.Del.geneid_precent_gene_num.gistic

ls | awk -F "_" '{print $1}' >../cancer.txt

#生成矩阵

sh foralllcancer.sh

#把Del改为负数

sh Del_negetive_num.sh

#合并known\novel

sh bind_knownnovel.sh

运行R文件D:\CRC_lncRNA\cnv\percentCNV\allcancer_chro_heatmap.R

cat percentages25novel_scores.gistic.bed|grep "Del"|awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25novel.Del.geneid.txt

cat percentages25novel_scores.gistic.bed|grep "Amp"|awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25novel.Amp.geneid.txt

cat percentages25known_scores.gistic.bed|grep "Del"|awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25known.Del.geneid.txt

cat percentages25known_scores.gistic.bed|grep "Amp"|awk '{print $16}'|sed 's/";//g'|sed 's/"//g'|uniq >percentages25known.Amp.geneid.txt

cat percentages25novel.Del.geneid.txt percentages25known.Del.geneid.txt>percentages25.Del.geneid.txt

cat percentages25novel.Amp.geneid.txt percentages25known.Amp.geneid.txt>percentages25.Amp.geneid.txt

#运行D:\CRC_lncRNA\diffexp\gtf_bind_same_lncNRA.py

#先将D:\CRC_lncRNA\filter\lncRNA\lncRNA.final.v2.gtf文件only_min_max_position_lncRNA.final.v2.gtf,只有最大最小的位置,将一个lncRNA多个位置合并为一个

#运行:D:\CRC_lncRNA\diffexp\different_gene_position_log2FoldChange_for_circos.py

#cat rec_DESeq2_edgeR_up_gene_for_circos.txt |sort -k 1V,1 -k 2n,2 |uniq >uniq_rec_DESeq2_edgeR_up_gene_for_circos.txt

#cat rec_DESeq2_edgeR_down_gene_for_circos.txt |sort -k 1V,1 -k 2n,2 |uniq >uniq_rec_DESeq2_edgeR_down_gene_for_circos.txt

#cat normal_DESeq2_edgeR_up_gene_for_circos.txt |sort -k 1V,1 -k 2n,2 |uniq >uniq_normal_DESeq2_edgeR_up_gene_for_circos.txt

#cat normal_DESeq2_edgeR_down_gene_for_circos.txt |sort -k 1V,1 -k 2n,2 |uniq >uniq_normal_DESeq2_edgeR_down_gene_for_circos.txt

/disk/soft/circos-0.69-6/bin/circos -conf circoscnvright.conf

head -100 uniq_rec_DESeq2_edgeR_up_gene_for_circos.txt>uniq_rec_DESeq2_edgeR_up_gene_for_circos2.txt

head -100 uniq_rec_DESeq2_edgeR_down_gene_for_circos.txt> uniq_rec_DESeq2_edgeR_down_gene_for_circos2.txt

head -100 uniq_normal_DESeq2_edgeR_up_gene_for_circos.txt>uniq_normal_DESeq2_edgeR_up_gene_for_circos2.txt

head -100 uniq_normal_DESeq2_edgeR_down_gene_for_circos.txt >uniq_normal_DESeq2_edgeR_down_gene_for_circos2.txt

head -10000 ../Del_19_38scores.sorted.gistic >../Del_19_38scores.sorted.gistic2

head -10000 ../Amp_19_38scores.sorted.gistic >../Amp_19_38scores.sorted.gistic2

/disk/soft/circos-0.69-6/bin/circos -conf circoscnvtest.conf

#差异lncRNA在13种癌症中的热图,上为上调,下为下调,logFC排序

#将bed 文件和gist文件比对获取差异lncRNA的score值

bedtools intersect -a /disk/zhw/CRClncRNA/cnv/19_38scores.sorted.gistic -b intersect_normal_cnv_up_down_gtf.bed -wb | awk '{print $1,$2,$3,$4,$5,$9}'|sed 's/\t/ /g'>intersect_normal_cnv_up_down_scores.gistic.bed

dos2unix intersect_normal_cnv_up_down_scores.gistic.bed

#对应多个去平均值合并

perl /disk/zhw/CRClncRNA/cnv/CNV_gistic_data/uniq_mean.pl /disk/zhw/CRClncRNA/cnv/differentgene_updown_heatmap/intersect_normal_cnv_up_down_scores.gistic.bed >uniq_intersect_normal_cnv_up_down_scores.gistic.bed

cat /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel.sorted.bed /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.sorted.bed >/disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel_knoiwn.sorted.bed

cat /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel.geneid.txt /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.known.geneid.txt > /disk/zhw/CRClncRNA/filter/lncRNA/lncRNA.final.v2.novel_known.geneid.txt

#############################

#单个流程

cat /disk/zhw/CRClncRNA/cnv/CNV_gistic_data/gistic/BRCA_scores.gistic|awk '{print "chr"$2,$3,$4,$1,$8}'|sed 's/ /\t/g'>BRCA_scores.gistic.bed

/disk/soft/CrossMap-0.2.7/bin/CrossMap.py bed /disk/database/human/convert_chain/hg19ToHg38.over.chain.gz BRCA_scores.gistic.bed BRCA.19_38scores.gistic

sort -k 1V,1 -k 2n,2 BRCA.19_38scores.gistic >BRCA.19_38scores.sorted.gistic

bedtools intersect -a BRCA.19_38scores.sorted.gistic -b /disk/zhw/CRClncRNA/cnv/differentgene_updown_heatmap/intersect_normal_cnv_up_down_gtf.bed -wb | awk '{print $1,$2,$3,$4,$5,$9}'|sed 's/\t/ /g'>BRCA_scores.gistic.bed

dos2unix BRCA_scores.gistic.bed

cat BRCA_scores.gistic.bed |grep "Amp"|sed 's/";//g'|sed 's/"//g'|sort|uniq>BRCA.rep_all.Amp.geneid_precent.gistic

cat BRCA_scores.gistic.bed |grep "Del"|sed 's/";//g'|sed 's/"//g'|sort|uniq>BRCA.rep_all.Del.geneid_precent.gistic

perl /disk/zhw/CRClncRNA/cnv/CNV_gistic_data/uniq_mean.pl BRCA.rep_all.Amp.geneid_precent.gistic >BRCA.rep_all_Amp.geneid_precent_gene_num.gistic

perl /disk/zhw/CRClncRNA/cnv/CNV_gistic_data/uniq_mean.pl BRCA.rep_all.Del.geneid_precent.gistic >BRCA.rep_all_Del.geneid_precent_gene_num.gistic

python addzero_lncRNA2.py BRCA.rep_all_Amp.geneid_precent_gene_num.gistic BRCA.Amp_no_bedtools_lncRNA.bed

python addzero_lncRNA2.py BRCA.rep_all_Del.geneid_precent_gene_num.gistic BRCA.Del_no_bedtools_lncRNA.bed

cat UVM.rep_all_Amp.geneid_precent_gene_num.gistic UVM.Amp_no_bedtools_lncRNA.bed>UVM.res_novel_known.Amp.geneid_precent_sorted.gistic

cat UVM.rep_all_Del.geneid_precent_gene_num.gistic UVM.Del_no_bedtools_lncRNA.bed>UVM.res_novel_known.Del.geneid_precent_sorted.gistic

cat UCEC.res_novel_known.Del.geneid_precent_sorted.gistic | awk '{print $1"\t"$2"\t"$3"\t-"$4}'> UCEC.res_novel_known.Del.geneid_precent_sorted2.gistic

##############################

#单个

sh pineline.sh

#批量:

cd /disk/zhw/CRClncRNA/cnv/differentgene_updown_heatmap

sh foralllcancer13.sh

sh Del_negetive_num.sh

#Del改为负数

一键复制

编辑

Web IDE

原始数据

按行查看

历史

python lncrna_分析指令备份.sh相关推荐

  1. Python股票分析系列——数据整理和绘制.p2

    Python股票分析系列--数据整理和绘制.p2 欢迎来到Python for Finance教程系列的第2部分. 在本教程中,我们将利用我们的股票数据进一步分解一些基本的数据操作和可视化. 我们将要 ...

  2. Python数据采集分析告诉你为何上海二手房你都买不起

    感谢关注Python爱好者社区公众号,在这里,我们会每天向您推送Python相关的文章实战干货. 来吧,一起Python. 对商业智能BI.大数据分析挖掘.机器学习,python,R等数据领域感兴趣的 ...

  3. python性能分析(一)——使用timeit给你的程序打个表吧

    前言 我们可以通过查看程序核心算法的代码,得知核心算法的渐进上界或者下界,从而大概估计出程序在运行时的效率,但是这并不够直观,也不一定十分靠谱(在整体程序中仍有一些不可忽略的运行细节在估计时被忽略了) ...

  4. Python 量化分析ETF指数基金投资

    Python 量化分析ETF指数基金. 标签(空格分隔): python 量化 ETF tushare pandas 文章目录 Python 量化分析ETF指数基金. 数据获取 数据分析 在喜马拉雅上 ...

  5. Python音频信号分析

    Python音频信号分析 一.录制音频 1.定义 2.录音 二.处理音频 1.批量读取.wav文件名 2.读取文件 3.写入文件 4.播放音频 一.录制音频 1.定义 (1)定义数据流 CHUNK = ...

  6. 命名管道 win7未响应_大数据分析Python建立分析数据管道

    如果您曾经想通过流数据或快速变化的数据在线学习Python,那么您可能会熟悉数据管道的概念.数据管道允许您通过一系列步骤将数据从一种表示形式转换为另一种表示形式.数据管道是数据工程的关键部分,我们将在 ...

  7. python视频口碑佳_从万众期待到口碑扑街!用Python来分析一下大家对唐探3的评论...

    原标题:从万众期待到口碑扑街!用Python来分析一下大家对唐探3的评论 来源 |菜鸟学Python 作者 |菜鸟哥 唐人街探案系列题材,凭借着演员出色的表演,以及精彩的探案故事,近些年来成为了一部很 ...

  8. python实现mongodb的备份与导入

    python实现mongodb的备份与导入 背景: 192.168.122.1    python 192.168.122.11  mongodb Python备份mongo 代码: # -*- co ...

  9. Python股票分析系列——基础股票数据操作(二).p4

    该系列视频已经搬运至bilibili: 点击查看 欢迎来到Python for Finance教程系列的第4部分.在本教程中,我们将基于Adj Close列创建烛台/ OHLC图,这将允许我介绍重新采 ...

最新文章

  1. html 简单机器人对话页面,简单的js聊天机器人框架BotUI
  2. 通过例子10分钟快速看懂pad_sequence、pack_padded_sequence以及pad_packed_sequence
  3. Java 内存模型与线程
  4. 数据库连接客户端使用(db2,oracle,mysql)
  5. FreeRTOS — 消息队列
  6. 使用idea编写代码作为生产者,Kafka接收其发来的信息【小案例】(一)
  7. 拥抱.NET Core,跨平台的轻量级RPC:Rabbit.Rpc
  8. linux关闭交互模式,linux – 关闭cp(copy)命令的交互模式(cp:overwrite?)
  9. FLY主题下载插件兼容php7适配emlog6.1.1
  10. poj 1094 Sorting It All Out 很好的拓扑排序,让我对拓扑排序有了一个很好的写法!!!
  11. 在框架中用JS设置target(用于location.href)
  12. 再回首Java第二十二天
  13. FPGA vs ASIC
  14. 1901~2100年节气表
  15. 军用产品环境可靠性试验-环境适应性检测
  16. golang cond
  17. 电脑鸿蒙系统怎么连接无线网络,手提电脑怎样连接WiFi?
  18. 从网易云音乐网页版无登陆下载MP3的办法
  19. [PyTroch系列-3]:PyTorch基础 - Hello World程序与张量(Tensor)概述
  20. error setting certificate verify locations: CAfile: F:/gitt/Git/mingw64/ssl/certs/ca-bundle.crt

热门文章

  1. Bootstrap 第一天
  2. 【转】Pro Android学习笔记(二五):用户界面和控制(13):LinearLayout和TableLayout...
  3. Windows7 USB/DVD Download Tool – U盘安装Win7工具
  4. 8位深, 16位深,24位深,32位深图片显示原理及对比
  5. 在手机里输入八卦及64卦符号(老年教程)
  6. Ubuntu之重新安装软件
  7. Win10 WSL adb使用
  8. 查看高通kernel用哪个dsti
  9. Android usb audio录音(四)
  10. 802.11 monitor模式