From 0cb2a2fd8daa374dc95199d6c7155b3aabe6ea91 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 9 Nov 2018 08:12:09 -0600
Subject: [PATCH] updates to RNASEQ

---
 alignment/filter_genefusions.pl | 36 +++++++++++++++++++++++++++++++--
 alignment/rnaseqalign.sh        |  2 +-
 alignment/starfusion.sh         | 15 +++++++++-----
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl
index 0786670..73ff3bd 100755
--- a/alignment/filter_genefusions.pl
+++ b/alignment/filter_genefusions.pl
@@ -20,18 +20,41 @@ while (my $line = <OM>) {
     $known{$line} = 1;
 }
 close OM;
-open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1385.genelist.txt" or die $!;
+open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!;
 while (my $line = <OM>) {
     chomp($line);
     $keep{$line} = 1;
 }
 
+my @exonfiles = `ls */*.exons.txt`;
+foreach $efile (@exonfiles) {
+    chomp($efile);
+    my ($dir,$pair,@etc) = split(/\/|\./,$efile);
+    open EFILE, "<$efile" or die $!;
+    my $header = <EFILE>;
+    while (my $line = <EFILE>) {
+	my ($leftgene,$rightgene,$lefttrx,$righttrx,$exonsrc,
+	    $exonnum,$exon_chr,$exon_start,$exon_end) = split(/\t/,$line);
+	if ($exonsrc =~ m/5/) {
+	    push @leftexons, $exonnum;
+	}else {
+	    push @rightexons, $exonnum;
+	}
+    }
+    $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
+    $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
+}
+
 open OUT, ">$opt{prefix}\.translocations.txt" or die $!;
+open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!;
 open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!;
 
 print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint",
 	       "RightBreakpoint","LeftStrand","RightStrand","RNAReads",
 	       "DNAReads"),"\n";
+print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand",
+	       "RightGene","RightBreakpoint","RightGeneExons","RightStrand","RNAReads","DNAReads"),"\n";
+
 print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode",
                "Fusion","DNA_support","RNA_support","Method","Frame"),"\n";
 
@@ -61,15 +84,24 @@ while (my $line = <FUSION>) {
   $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
   my $fname = join("--",$hash{LeftGene},$hash{RightGene});
   my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
+  my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos);
+  my ($leftexon,$rightexon);
+  if ($exonnuminfo{$key}) {
+      $leftexon = $exonnuminfo{$key}{leftgene};
+      $rightexon = $exonnuminfo{$key}{rightgene};
+  }
   my ($dna_support,$rna_support)=("no") x 2;
   if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) {
     $rna_support = "yes";
     print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene},
 		   $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand},
 		   $hash{RightStrand},$hash{SumRNAReads},0),"\n";
+    print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
+		   $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
+		   $hash{SumRNAReads},0,$hash{PROT_FUSION_TYPE},$hash{annots}),"\n";
     print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
 		     $dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
-   print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
+    print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
                      $dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
   }
 }
diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh
index 8feea1b..4ef4299 100644
--- a/alignment/rnaseqalign.sh
+++ b/alignment/rnaseqalign.sh
@@ -63,7 +63,7 @@ else
     else
 	hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt
     fi
-    if [[ $umi==1 ]]
+    if [[ $umi == 1 ]]
     then
 	python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam
     else
diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh
index 27937e8..619569c 100644
--- a/alignment/starfusion.sh
+++ b/alignment/starfusion.sh
@@ -51,19 +51,24 @@ then
     export TMP_HOME=$tmphome
     index_path=${refgeno}/CTAT_lib_trinity/
     trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion
-    cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt
-    cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.coding_effect.txt
+    #cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt
+    cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt
 else
     module add star/2.5.2b
     index_path=${refgeno}/CTAT_lib/
     STAR-Fusion --genome_lib_dir ${index_path} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err
     cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt
 fi
-if [[ $filter==1 ]]
+
+module load singularity/2.6.0
+export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images"
+cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate  -db  /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
+
+if [[ $filter == 1 ]]
 then
     cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed
     bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
-    #cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint|perl -pe 's/:/\t/g' |awk '{print $1"\t"$2"\t"$4"\t"$5"\tAVG"}' > coords.txt
-    #java -Xmx1G -jar /project/shared/bicf_workflow_ref/seqprg/oncofuse-1.1.1/Oncofuse.jar -a hg38 coords.txt coord AVG oncofuse.out
     perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt
 fi
+
+
-- 
GitLab