From fb5fcff844914394ddf40174050d3bda71439ea1 Mon Sep 17 00:00:00 2001 From: Erika Villa <Erika.Villa@utsouthwestern.edu> Date: Fri, 6 Jul 2018 09:41:31 -0500 Subject: [PATCH] cbioportal --- alignment/filter_genefusions.pl | 12 ++++++------ genect_rnaseq/cBioPortal_documents.pl | 12 ++++++------ variants/filter_cnvkit.pl | 6 +++++- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index cc6481c..0786670 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -5,7 +5,6 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); my %opt = (); my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h'); - my %entrez; open ENT, "</project/shared/bicf_workflow_ref/gene_info.human.txt" or die $!; my $headline = <ENT>; @@ -36,7 +35,7 @@ print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint", print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; -my $sname = (split(/_DNA_panel1385/,$opt{prefix}))[0]; +my $sname = $opt{prefix}; open FUSION, "<$opt{fusion}" or die $!; my $header = <FUSION>; @@ -62,15 +61,16 @@ while (my $line = <FUSION>) { $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; my $fname = join("--",$hash{LeftGene},$hash{RightGene}); my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); - my $ename = join("--",$entrez{$hash{LeftGene}},$entrez{$hash{RightGene}}); - my ($dna_support,$rna_support)=("no","no"); + my ($dna_support,$rna_support)=("no") x 2; if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { $rna_support = "yes"; print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, $hash{RightStrand},$hash{SumRNAReads},0),"\n"; - print OUTIR join("\t",$fname,$ename,"UTSW NGS Clinical Sequencing Lab",$sname,$fname." fusion", - 0,$rna_support,"STAR 2.5.2b","N/A"),"\n"; + print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", + $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; + print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", + $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; } } diff --git a/genect_rnaseq/cBioPortal_documents.pl b/genect_rnaseq/cBioPortal_documents.pl index 7f1fe0c..fd0a8ba 100644 --- a/genect_rnaseq/cBioPortal_documents.pl +++ b/genect_rnaseq/cBioPortal_documents.pl @@ -35,28 +35,28 @@ close ENT_ENS; if($opt{fpkm}){ open FPKM, "<$opt{fpkm}" or die $!; - open OUTF, ">$opt{prefix}\.data_expression_median_fpkm.txt" or die $!; + open OUTF, ">$opt{prefix}\.data_fpkm.cbioportal.txt" or die $!; print OUTF join("\t","Entrez_Gene_Id",$opt{prefix}),"\n"; my %fpkm; my $fpkm_header = <FPKM>; while(my $line = <FPKM>){ chomp $line; - my @row = split(/\t/,$line); - my $ensembl = (split(/\./,$row[0]))[0]; + my ($id,$gene,$ref,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line); + my $ensembl = (split(/\./,$id))[0]; if ($entrez{$ensembl}) { $entrezid = $entrez{$ensembl}; }else { - $entrezid = $entrez{$row[1]}; + $entrezid = $entrez{$gene}; } next unless ($entrezid); - print OUTF join("\t",$entrezid,$row[7]),"\n"; + print OUTF join("\t",$entrezid,$fpkm),"\n"; } close OUTF; } if($opt{logcpm}){ open IN, "<$opt{logcpm}" or die $!; - open OUTL, ">$opt{prefix}\.cBioPortal.logCPM.txt" or die $!; + open OUTL, ">$opt{prefix}\.data_logCPM.cbioportal.txt" or die $!; print OUTL join("\t","Entrez_Gene_Id",$opt{prefix}),"\n"; $fname = basename($opt{logcpm}); my $sample = (split(/\./,$fname))[0]; diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index 596ec22..ddcf170 100644 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -30,9 +30,11 @@ my $file = shift @ARGV; my $prefix = (split(/\./,(split(/\//,$file))[0]))[0]; open OUT, ">$prefix\.cnvcalls.txt" or die $!; -open BIO, ">$prefix\.data_cna_cbioportal.txt" or die $!; +open BIO, ">$prefix\.data_cna_discrete.cbioportal.txt" or die $!; +open BIO2, ">$prefix\.data_cna_continuous.cbioportal.txt" or die $!; print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n"; print BIO join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n"; +print BIO2 join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n"; open IN, "<$file" or die $!; my $header = <IN>; @@ -56,9 +58,11 @@ while (my $line = <IN>) { $cn_cbio = $cn -2; $cn_cbio = 2 if ($cn > 4); print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n"; + print BIO2 join("\t",$gene,$entrez{$gene},$log2),"\n"; print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n"; } } close IN; close OUT; close BIO; +close BIO2; -- GitLab