update gene fusion 2 answer script

d8b85458 · Brandi Cantarel · 31faa289 · d8b85458 · d8b85458
Commit d8b85458 authored 5 years ago by Brandi Cantarel
--- a/alignment/filter_genefusions.pl
+++ b/alignment/filter_genefusions.pl
@@ -14,12 +14,14 @@ while (my $line = <ENT>) {
      $entrez{$row[2]} = $row[1];
  }
 }
 open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!;
 while (my $line = <OM>) {
    chomp($line);
    $known{$line} = 1;
 }
 close OM;
 open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!;
 while (my $line = <OM>) {
    chomp($line);
@@ -43,20 +45,24 @@ foreach $efile (@exonfiles) {
 	    push @rightexons, $exonnum;
 	}
    }
-    $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
+    if ($leftexons[0] eq  $leftexons[-1]) {
-    $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
+	$exonnuminfo{$dir}{leftgene} = $leftexons[0];
+    }else {
+	$exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
+    }
+    if ($rightexons[0] eq  $rightexons[-1]) {
+	$exonnuminfo{$dir}{rightgene} = $rightexons[0];
+    }else {
+	$exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
+    }
 }
-open OUT, ">$opt{prefix}\.translocations.txt" or die $!;
 open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!;
 open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!;
-print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint",
-	       "RightBreakpoint","LeftStrand","RightStrand","RNAReads",
-	       "DNAReads"),"\n";
 print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand",
 	       "RightGene","RightBreakpoint","RightGeneExons","RightStrand",
-	       "RNAReads","DNAReads","FusionType","Annot"),"\n";
+	       "RNAReads","DNAReads","FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n";
 print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode",
               "Fusion","DNA_support","RNA_support","Method","Frame"),"\n";
@@ -75,6 +81,7 @@ while (my $line = <FUSION>) {
  foreach my $i (0..$#row) {
    $hash{$hline[$i]} = $row[$i];
  }
+  my @filter;
  my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint});
  my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint});
  $hash{LeftBreakpoint} = join(":",$left_chr,$left_pos);
@@ -83,7 +90,9 @@ while (my $line = <FUSION>) {
  $hash{RightStrand} = $right_strand;
  $hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0];
  $hash{RightGene} = (split(/\^/,$hash{RightGene}))[0];
-  next unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}});
+  unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}) {
+      push @filter, 'OutsideGeneList';
+  }
  $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
  my $fname = join("--",$hash{LeftGene},$hash{RightGene});
  my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
@@ -95,20 +104,40 @@ while (my $line = <FUSION>) {
  }
  $hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME');
  my ($dna_support,$rna_support)=("no") x 2;
-  if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) {
+  $hash{annots} =~ s/CHROMOSOMAL\[/CHROMOSOMAL /;
-    $rna_support = "yes";
+  $hash{annots} =~ s/\]|\[|\"//g;
-    print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene},
+  @annots = split(/,/,$hash{annots});
-		   $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand},
+  my ($chrom_type_dist) = grep(/CHROMOSOMAL/,@annots);
-		   $hash{RightStrand},$hash{SumRNAReads},0),"\n";
+  my ($chrtype,$chrdist) = split(/ /,$chrom_type_dist);
-    print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
+  @annot2 = grep(!/CHROMOSOMAL/, @annots);
-		   $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
+  $fusion_annot = '';
-		   $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$hash{annots}),"\n";
+  if (scalar(@annot2) > 0) {
-    print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
+      $fusion_annot = join(",",@annot2);
-		     $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
-    print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
-                     $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
  }
+  if ($known{$fname2} || $fusion_annot =~ m/CCLE|Cosmic|FA_CancerSupp|Klijn_CellLines|Mitelman|YOSHIHARA_TCGA|chimer/i) {
+      push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 3);
+  }else {
+      push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 5);
+  }
+  $rna_support = "yes";
+  if ($left_chr eq $right_chr) {
+      $diff = abs($right_pos-$left_pos);
+      push @filter, 'ReadThrough' if ($diff < 200000);
+  }
+  my $qc ='PASS';
+  if (scalar(@filter) > 0) {
+      $qc = join(";","FailedQC",@filter);
+  }
+  print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
+		 $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
+		 $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n";
+  print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
+		   $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
+  print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
+		   $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
 }
-close OUT;
+close OAS;
 close OUTIR;
--- a/genect_rnaseq/cBioPortal_documents.pl
+++ b/genect_rnaseq/cBioPortal_documents.pl
@@ -6,15 +6,17 @@ use File::Basename;
 my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h');
-open ENT_ENS, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
+open ENT_GENE, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
 my %entrez;
-my $ent_header = <ENT_ENS>;
+my %entgene;
-while (my $line = <ENT_ENS>){
+my $ent_header = <ENT_GENE>;
+while (my $line = <ENT_GENE>){
  chomp $line;
  my @row = split(/\t/, $line);
-  $entrez{$row[2]}=$row[1];
+  $entgene{$row[6]}{$row[2]}=$row[1];
+  #$entrez{$row[2]}=$row[1];
 }
-close ENT_ENS;
+close ENT_GENE;
 open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!;
 my $gn_header = <ENT_ENS>;
 my %ensym;
@@ -41,12 +43,14 @@ if($opt{fpkm}){
  my $fpkm_header = <FPKM>;
  while(my $line = <FPKM>){
    chomp $line;
-    my ($id,$gene,$ref,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line);
+	my $entrezid=0;
+    my ($id,$gene,$chr,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line);
+	$chr =~ s/^chr//g;
    my $ensembl = (split(/\./,$id))[0];
    if ($entrez{$ensembl}) {
      $entrezid = $entrez{$ensembl};
-    }else {
+    }elsif($entgene{$chr}{$gene}){
-      $entrezid = $entrez{$gene};
+      $entrezid = $entgene{$chr}{$gene};
    }
    next unless ($entrezid);
    print OUTF join("\t",$entrezid,$fpkm),"\n"; 
@@ -68,19 +72,24 @@ if($opt{logcpm}){
    chomp($line);
    my @row = split(/\t/,$line);
    my $gene = $row[0];
+	my $chrom = (split(";",$row[1]))[0];
+	$chrom =~ s/^chr//g;
    my $ct = $row[-1];
    next if($gene =~ m/^__/);
-    $cts{$gene}{$sample} = $ct;
+    $cts{$chrom}{$gene} = $ct;
    $total += $ct;
  }
+	print $total."\n";
  close IN;
-  foreach $ens (keys %cts) {
+  foreach my $ens_chr (keys %cts) {
-    next unless $entrez{$ens};
+	foreach my $ens (keys $cts{$ens_chr}){
-    unless ($cts{$ens}) {
+    	next unless $entgene{$ens_chr}{$ens};#($entrez{$ens} or $entgene{$gene}{$chrom});
-      $cts{$ens} = 0;
+    	#unless ($cts{$ens_chr}{$ens}){
-    }
+    	#  $cts{$ens_chr}{$ens} = 0;
-    $cpm = ($cts{$ens}/$total)*1e6;
+    	#}
-    print OUTL join("\t",$entrez{$ens},sprintf("%.2f",log2($cpm))),"\n";
+    	$cpm = ($cts{$ens_chr}{$ens}/$total)*1e6;
+    	print OUTL join("\t",$entgene{$ens_chr}{$ens},sprintf("%.2f",log2($cpm))),"\n";
+	}
  }
  close OUTL;
 }