From fb5fcff844914394ddf40174050d3bda71439ea1 Mon Sep 17 00:00:00 2001
From: Erika Villa <Erika.Villa@utsouthwestern.edu>
Date: Fri, 6 Jul 2018 09:41:31 -0500
Subject: [PATCH] cbioportal

---
 alignment/filter_genefusions.pl       | 12 ++++++------
 genect_rnaseq/cBioPortal_documents.pl | 12 ++++++------
 variants/filter_cnvkit.pl             |  6 +++++-
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl
index cc6481c..0786670 100755
--- a/alignment/filter_genefusions.pl
+++ b/alignment/filter_genefusions.pl
@@ -5,7 +5,6 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
 
 my %opt = ();
 my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h');
-
 my %entrez;
 open ENT, "</project/shared/bicf_workflow_ref/gene_info.human.txt" or die $!;
 my $headline = <ENT>;
@@ -36,7 +35,7 @@ print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint",
 print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode",
                "Fusion","DNA_support","RNA_support","Method","Frame"),"\n";
 
-my $sname = (split(/_DNA_panel1385/,$opt{prefix}))[0];
+my $sname = $opt{prefix};
 
 open FUSION, "<$opt{fusion}" or die $!;
 my $header = <FUSION>;
@@ -62,15 +61,16 @@ while (my $line = <FUSION>) {
   $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
   my $fname = join("--",$hash{LeftGene},$hash{RightGene});
   my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
-  my $ename = join("--",$entrez{$hash{LeftGene}},$entrez{$hash{RightGene}});
-  my ($dna_support,$rna_support)=("no","no");
+  my ($dna_support,$rna_support)=("no") x 2;
   if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) {
     $rna_support = "yes";
     print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene},
 		   $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand},
 		   $hash{RightStrand},$hash{SumRNAReads},0),"\n";
-    print OUTIR join("\t",$fname,$ename,"UTSW NGS Clinical Sequencing Lab",$sname,$fname." fusion",
-		     0,$rna_support,"STAR 2.5.2b","N/A"),"\n";
+    print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
+		     $dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
+   print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
+                     $dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
   }
 }
 
diff --git a/genect_rnaseq/cBioPortal_documents.pl b/genect_rnaseq/cBioPortal_documents.pl
index 7f1fe0c..fd0a8ba 100644
--- a/genect_rnaseq/cBioPortal_documents.pl
+++ b/genect_rnaseq/cBioPortal_documents.pl
@@ -35,28 +35,28 @@ close ENT_ENS;
 
 if($opt{fpkm}){
   open FPKM, "<$opt{fpkm}" or die $!;
-  open OUTF, ">$opt{prefix}\.data_expression_median_fpkm.txt" or die $!;
+  open OUTF, ">$opt{prefix}\.data_fpkm.cbioportal.txt" or die $!;
   print OUTF join("\t","Entrez_Gene_Id",$opt{prefix}),"\n";
   my %fpkm;
   my $fpkm_header = <FPKM>;
   while(my $line = <FPKM>){
     chomp $line;
-    my @row = split(/\t/,$line);
-    my $ensembl = (split(/\./,$row[0]))[0];
+    my ($id,$gene,$ref,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line);
+    my $ensembl = (split(/\./,$id))[0];
     if ($entrez{$ensembl}) {
       $entrezid = $entrez{$ensembl};
     }else {
-      $entrezid = $entrez{$row[1]};
+      $entrezid = $entrez{$gene};
     }
     next unless ($entrezid);
-    print OUTF join("\t",$entrezid,$row[7]),"\n"; 
+    print OUTF join("\t",$entrezid,$fpkm),"\n"; 
   }
   close OUTF;
 }
 
 if($opt{logcpm}){
   open IN, "<$opt{logcpm}" or die $!;
-  open OUTL, ">$opt{prefix}\.cBioPortal.logCPM.txt" or die $!;
+  open OUTL, ">$opt{prefix}\.data_logCPM.cbioportal.txt" or die $!;
   print OUTL join("\t","Entrez_Gene_Id",$opt{prefix}),"\n";
   $fname = basename($opt{logcpm});
   my $sample = (split(/\./,$fname))[0];
diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl
index 596ec22..ddcf170 100644
--- a/variants/filter_cnvkit.pl
+++ b/variants/filter_cnvkit.pl
@@ -30,9 +30,11 @@ my $file = shift @ARGV;
 my $prefix = (split(/\./,(split(/\//,$file))[0]))[0];
 
 open OUT, ">$prefix\.cnvcalls.txt" or die $!;
-open BIO, ">$prefix\.data_cna_cbioportal.txt" or die $!;
+open BIO, ">$prefix\.data_cna_discrete.cbioportal.txt" or die $!;
+open BIO2, ">$prefix\.data_cna_continuous.cbioportal.txt" or die $!;
 print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n";
 print BIO join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n";
+print BIO2 join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n";
 
 open IN, "<$file" or die $!;
 my $header = <IN>;
@@ -56,9 +58,11 @@ while (my $line = <IN>) {
 	$cn_cbio = $cn -2;
 	$cn_cbio = 2 if ($cn > 4);
 	print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n";
+        print BIO2 join("\t",$gene,$entrez{$gene},$log2),"\n";
 	print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n";
     }
 }
 close IN;
 close OUT;
 close BIO;
+close BIO2;
-- 
GitLab