From dbd26bcfed518b816be336924261601995c4b283 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Sun, 2 Aug 2020 14:41:47 -0500 Subject: [PATCH] star fusion docker agfusion file format --- alignment/bam2tdf.sh | 0 alignment/bamqc.sh | 0 alignment/dnaseqalign.sh | 0 alignment/filter_genefusions_docker.pl | 127 +++++++++++++++++++++++++ alignment/filter_viral_idxstats.pl | 0 alignment/hisat_genotype.sh | 0 alignment/indexbams.sh | 0 alignment/markdups.sh | 0 alignment/rnaseqalign.sh | 0 alignment/starfusion.sh | 15 ++- alignment/virusalign.sh | 0 11 files changed, 137 insertions(+), 5 deletions(-) mode change 100644 => 100755 alignment/bam2tdf.sh mode change 100644 => 100755 alignment/bamqc.sh mode change 100644 => 100755 alignment/dnaseqalign.sh create mode 100755 alignment/filter_genefusions_docker.pl mode change 100644 => 100755 alignment/filter_viral_idxstats.pl mode change 100644 => 100755 alignment/hisat_genotype.sh mode change 100644 => 100755 alignment/indexbams.sh mode change 100644 => 100755 alignment/markdups.sh mode change 100644 => 100755 alignment/rnaseqalign.sh mode change 100644 => 100755 alignment/starfusion.sh mode change 100644 => 100755 alignment/virusalign.sh diff --git a/alignment/bam2tdf.sh b/alignment/bam2tdf.sh old mode 100644 new mode 100755 diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh old mode 100644 new mode 100755 diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh old mode 100644 new mode 100755 diff --git a/alignment/filter_genefusions_docker.pl b/alignment/filter_genefusions_docker.pl new file mode 100755 index 0000000..6959c6c --- /dev/null +++ b/alignment/filter_genefusions_docker.pl @@ -0,0 +1,127 @@ +#!/usr/bin/perl -w +#svvcf2bed.pl + +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); + +my %opt = (); +my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h','datadir|r=s'); + +open OM, "<$opt{datadir}/known_genefusions.txt" or die $!; +while (my $line = <OM>) { + chomp($line); + $known{$line} = 1; +} +close OM; + +open OM, "<$opt{datadir}/genelist.txt" or die $!; +while (my $line = <OM>) { + chomp($line); + $keep{$line} = 1; +} + +my @exonfiles = `ls */*.exons.csv`; +foreach $efile (@exonfiles) { + chomp($efile); + my @leftexons; + my @rightexons; + my ($dir,$pair,@etc) = split(/\/|\./,$efile); + open EFILE, "<$efile" or die $!; + my $header = <EFILE>; + while (my $line = <EFILE>) { + my ($leftgene,$rightgene,$lefttrx,$righttrx, + $leftstrand,$rightstrand,$exonsrc,$exonnum, + $exon_chr,$exon_start,$exon_end) = split(/,/,$line); + if ($exonsrc =~ m/5/) { + push @leftexons, $exonnum; + }else { + push @rightexons, $exonnum; + } + } + if ($leftexons[0] eq $leftexons[-1]) { + $exonnuminfo{$dir}{leftgene} = $leftexons[0]; + }else { + $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); + } + if ($rightexons[0] eq $rightexons[-1]) { + $exonnuminfo{$dir}{rightgene} = $rightexons[0]; + }else { + $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); + } +} + +open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; + +print OAS join("\t","FusionName","LeftGene","LeftBreakpoint", + "LeftGeneExons","LeftStrand","RightGene","RightBreakpoint", + "RightGeneExons","RightStrand","RNAReads","DNAReads", + "FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n"; + +my $sname = $opt{prefix}; + +open FUSION, "<$opt{fusion}" or die $!; +my $header = <FUSION>; +chomp($header); +$header =~ s/^#//; +my @hline = split(/\t/,$header); +while (my $line = <FUSION>) { + chomp($line); + my @row = split(/\t/,$line); + my %hash; + foreach my $i (0..$#row) { + $hash{$hline[$i]} = $row[$i]; + } + my @filter; + my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint}); + my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint}); + $hash{LeftBreakpoint} = join(":",$left_chr,$left_pos); + $hash{RightBreakpoint} = join(":",$right_chr,$right_pos); + $hash{LeftStrand} = $left_strand; + $hash{RightStrand} = $right_strand; + $hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0]; + $hash{RightGene} = (split(/\^/,$hash{RightGene}))[0]; + unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}) { + push @filter, 'OutsideGeneList'; + } + $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; + my $fname = join("--",$hash{LeftGene},$hash{RightGene}); + my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); + my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos); + my ($leftexon,$rightexon); + if ($exonnuminfo{$key}) { + $leftexon = $exonnuminfo{$key}{leftgene}; + $rightexon = $exonnuminfo{$key}{rightgene}; + } + $hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME'); + my ($dna_support,$rna_support)=("no") x 2; + $hash{annots} =~ s/CHROMOSOMAL\[/CHROMOSOMAL /; + $hash{annots} =~ s/\]|\[|\"//g; + @annots = split(/,/,$hash{annots}); + my ($chrom_type_dist) = grep(/CHROMOSOMAL/,@annots); + my ($chrtype,$chrdist) = split(/ /,$chrom_type_dist); + @annot2 = grep(!/CHROMOSOMAL/, @annots); + $fusion_annot = ''; + if (scalar(@annot2) > 0) { + $fusion_annot = join(",",@annot2); + } + if ($known{$fname2} || $fusion_annot =~ m/CCLE|Cosmic|FA_CancerSupp|Klijn_CellLines|Mitelman|YOSHIHARA_TCGA|chimer/i) { + push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 3); + }else { + push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 5); + } + $rna_support = "yes"; + if ($left_chr eq $right_chr) { + $diff = abs($right_pos-$left_pos); + push @filter, 'ReadThrough' if ($diff < 200000); + } + my $qc ='PASS'; + if (scalar(@filter) > 0) { + $qc = join(";","FailedQC",@filter); + } + + print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, + $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, + $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n"; +} + + +close OAS; diff --git a/alignment/filter_viral_idxstats.pl b/alignment/filter_viral_idxstats.pl old mode 100644 new mode 100755 diff --git a/alignment/hisat_genotype.sh b/alignment/hisat_genotype.sh old mode 100644 new mode 100755 diff --git a/alignment/indexbams.sh b/alignment/indexbams.sh old mode 100644 new mode 100755 diff --git a/alignment/markdups.sh b/alignment/markdups.sh old mode 100644 new mode 100755 diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh old mode 100644 new mode 100755 diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh old mode 100644 new mode 100755 index 02e3f40..a627763 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -57,17 +57,22 @@ then module load singularity/3.0.2 export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images" cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh + cut -f 8,10 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed + bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt + if [[ $filter == 1 ]] + then + perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt + fi else export PYENSEMBL_CACHE_DIR=/opt refgeno=${index_path}/CTAT_resource_lib STAR-Fusion --genome_lib_dir ${refgeno} --min_sum_frags 3 --CPU $NPROC --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt cut -f 7-10 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "agfusion annotate -db agfusion.homo_sapiens.95.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh -fi - -if [[ $filter == 1 ]] -then cut -f 8,10 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt - perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt + if [[ $filter == 1 ]] + then + perl $baseDir/filter_genefusions_docker.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt + fi fi diff --git a/alignment/virusalign.sh b/alignment/virusalign.sh old mode 100644 new mode 100755 -- GitLab