Skip to content
Snippets Groups Projects
Commit d1916729 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

move fpkm subset to genect

parent 12f57bca
No related merge requests found
#!/bin/bin/perl
use strict;
use warnings;
use diagnostics;
use Getopt::Long;
my ($genelist,$fpkm_file) = ("") x 2;
GetOptions( 'g|genes=s' => \$genelist,
'f|fpkm=s' => \$fpkm_file);
my $fpkm_out = $fpkm_file;
$fpkm_out =~ s/\.fpkm.+txt/\.fpkm.capture.txt/;
open OUT, ">$fpkm_out" or die $!;
#gene_info.human.txt file is required for gene alias information
open GENEINFO, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
my %geneinfo;
while(my $gline = <GENEINFO>){
chomp $gline;
my ($taxid,$geneid,$symbol,$locus,$syn,@info) = split("\t",$gline);
if($syn ne '-'){
my @synonyms = split(/\|/,$syn);
$geneinfo{$symbol} = $syn;
foreach my $synonym(@synonyms){
$geneinfo{$synonym} = $symbol;
}
}
}
#genelist is the list of genes in panel. Not specifying genelist will print out all lines(e.g. wholernaseq)
my %genes;
if($genelist ne ""){
open GENES, "<$genelist" or die $!;
while (my $gene = <GENES>){
chomp $gene;
$genes{$gene} = 1;
}
close GENES;
}
my $genelist_size = keys %genes;
#FPKM data
open FPKM, "<$fpkm_file" or die $!;
my %fpkmdata;
my %found;
my $header = <FPKM>;
chomp $header;
print OUT $header."\n";
while(my $line = <FPKM>){
chomp $line;
my ($gid,$gname,$ref,$strand,$start,$end,$cov,$fpkm,$tpm) = split("\t",$line);
if($ref =~ /^chr/){
if(!exists $fpkmdata{$gname}){$fpkmdata{$gname} = $line;}
else{$fpkmdata{$gname} = $fpkmdata{$gname}."\n".$line;}
}
if ($genelist_size == 0){ #wholernaseq
print OUT $line."\n";
}
elsif($genes{$gname} and $ref =~ /^chr/){ #panel gene found in fpkm data
print OUT $line."\n";
$found{$gname} =1;
}
}
foreach my $gene_key (keys %genes){
if($found{$gene_key}){}
else { #gene from gene list is not in fpkm data or has a different name
if($geneinfo{$gene_key}){ #gene is in gene_info.human.txt
if($fpkmdata{$geneinfo{$gene_key}}){ #gene from gene list is an alias in gene_info.human.txt
print OUT $fpkmdata{$geneinfo{$gene_key}}."\n";
}
elsif($geneinfo{$gene_key} =~ /\|/){
my @alias_array = split(/\|/, $geneinfo{$gene_key});
foreach my $alias(@alias_array){
if($fpkmdata{$alias}){print OUT $fpkmdata{$alias}."\n";}#gene from genelist is a Gene Name in gene_info.human.txt but fpkm data has an alias
}
}
}
}
}
close FPKM;
close OUT;
......@@ -11,13 +11,14 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:g:p:s:h opt
while getopts :b:g:p:f:s:h opt
do
case $opt in
b) sbam=$OPTARG;;
g) gtf=$OPTARG;;
p) pair_id=$OPTARG;;
s) stranded=$OPTARG;;
f) filter=$OPTARG;;
h) usage;;
esac
done
......@@ -48,4 +49,11 @@ featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_nam
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $NPROC -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
if [[ -f $filter ]]
then
cd ..
mv ${pair_id}.fpkm.txt ${prefix}.fpkm.ori.txt
perl ${baseDir}/fpkm_subset_panel.pl -f ${prefix}.fpkm.ori.txt -g $filter
mv ${prefix}.fpkm.capture.txt ${pair_id}.fpkm.txt
fi
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment