Skip to content
Snippets Groups Projects
Commit dbd26bcf authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

star fusion docker agfusion file format

parent dcba19f7
Branches
Tags
No related merge requests found
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#!/usr/bin/perl -w
#svvcf2bed.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h','datadir|r=s');
open OM, "<$opt{datadir}/known_genefusions.txt" or die $!;
while (my $line = <OM>) {
chomp($line);
$known{$line} = 1;
}
close OM;
open OM, "<$opt{datadir}/genelist.txt" or die $!;
while (my $line = <OM>) {
chomp($line);
$keep{$line} = 1;
}
my @exonfiles = `ls */*.exons.csv`;
foreach $efile (@exonfiles) {
chomp($efile);
my @leftexons;
my @rightexons;
my ($dir,$pair,@etc) = split(/\/|\./,$efile);
open EFILE, "<$efile" or die $!;
my $header = <EFILE>;
while (my $line = <EFILE>) {
my ($leftgene,$rightgene,$lefttrx,$righttrx,
$leftstrand,$rightstrand,$exonsrc,$exonnum,
$exon_chr,$exon_start,$exon_end) = split(/,/,$line);
if ($exonsrc =~ m/5/) {
push @leftexons, $exonnum;
}else {
push @rightexons, $exonnum;
}
}
if ($leftexons[0] eq $leftexons[-1]) {
$exonnuminfo{$dir}{leftgene} = $leftexons[0];
}else {
$exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
}
if ($rightexons[0] eq $rightexons[-1]) {
$exonnuminfo{$dir}{rightgene} = $rightexons[0];
}else {
$exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
}
}
open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!;
print OAS join("\t","FusionName","LeftGene","LeftBreakpoint",
"LeftGeneExons","LeftStrand","RightGene","RightBreakpoint",
"RightGeneExons","RightStrand","RNAReads","DNAReads",
"FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n";
my $sname = $opt{prefix};
open FUSION, "<$opt{fusion}" or die $!;
my $header = <FUSION>;
chomp($header);
$header =~ s/^#//;
my @hline = split(/\t/,$header);
while (my $line = <FUSION>) {
chomp($line);
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
$hash{$hline[$i]} = $row[$i];
}
my @filter;
my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint});
my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint});
$hash{LeftBreakpoint} = join(":",$left_chr,$left_pos);
$hash{RightBreakpoint} = join(":",$right_chr,$right_pos);
$hash{LeftStrand} = $left_strand;
$hash{RightStrand} = $right_strand;
$hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0];
$hash{RightGene} = (split(/\^/,$hash{RightGene}))[0];
unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}) {
push @filter, 'OutsideGeneList';
}
$hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
my $fname = join("--",$hash{LeftGene},$hash{RightGene});
my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos);
my ($leftexon,$rightexon);
if ($exonnuminfo{$key}) {
$leftexon = $exonnuminfo{$key}{leftgene};
$rightexon = $exonnuminfo{$key}{rightgene};
}
$hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME');
my ($dna_support,$rna_support)=("no") x 2;
$hash{annots} =~ s/CHROMOSOMAL\[/CHROMOSOMAL /;
$hash{annots} =~ s/\]|\[|\"//g;
@annots = split(/,/,$hash{annots});
my ($chrom_type_dist) = grep(/CHROMOSOMAL/,@annots);
my ($chrtype,$chrdist) = split(/ /,$chrom_type_dist);
@annot2 = grep(!/CHROMOSOMAL/, @annots);
$fusion_annot = '';
if (scalar(@annot2) > 0) {
$fusion_annot = join(",",@annot2);
}
if ($known{$fname2} || $fusion_annot =~ m/CCLE|Cosmic|FA_CancerSupp|Klijn_CellLines|Mitelman|YOSHIHARA_TCGA|chimer/i) {
push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 3);
}else {
push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 5);
}
$rna_support = "yes";
if ($left_chr eq $right_chr) {
$diff = abs($right_pos-$left_pos);
push @filter, 'ReadThrough' if ($diff < 200000);
}
my $qc ='PASS';
if (scalar(@filter) > 0) {
$qc = join(";","FailedQC",@filter);
}
print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
$hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
$hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n";
}
close OAS;
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -57,17 +57,22 @@ then
module load singularity/3.0.2
export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images"
cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
cut -f 8,10 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed
bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
if [[ $filter == 1 ]]
then
perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt
fi
else
export PYENSEMBL_CACHE_DIR=/opt
refgeno=${index_path}/CTAT_resource_lib
STAR-Fusion --genome_lib_dir ${refgeno} --min_sum_frags 3 --CPU $NPROC --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion
cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt
cut -f 7-10 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "agfusion annotate -db agfusion.homo_sapiens.95.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
fi
if [[ $filter == 1 ]]
then
cut -f 8,10 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed
bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt
if [[ $filter == 1 ]]
then
perl $baseDir/filter_genefusions_docker.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt
fi
fi
File mode changed from 100644 to 100755
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment