diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index 1c43cd37dc9f9d66b2317a7626756454d96754e3..c7097db2ec82487ef53d66b3e82647673b1ee592 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -14,12 +14,14 @@ while (my $line = <ENT>) { $entrez{$row[2]} = $row[1]; } } + open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!; while (my $line = <OM>) { chomp($line); $known{$line} = 1; } close OM; + open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; while (my $line = <OM>) { chomp($line); @@ -43,20 +45,24 @@ foreach $efile (@exonfiles) { push @rightexons, $exonnum; } } - $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); - $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); + if ($leftexons[0] eq $leftexons[-1]) { + $exonnuminfo{$dir}{leftgene} = $leftexons[0]; + }else { + $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); + } + if ($rightexons[0] eq $rightexons[-1]) { + $exonnuminfo{$dir}{rightgene} = $rightexons[0]; + }else { + $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); + } } -open OUT, ">$opt{prefix}\.translocations.txt" or die $!; open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; -print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint", - "RightBreakpoint","LeftStrand","RightStrand","RNAReads", - "DNAReads"),"\n"; print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", "RightGene","RightBreakpoint","RightGeneExons","RightStrand", - "RNAReads","DNAReads","FusionType","Annot"),"\n"; + "RNAReads","DNAReads","FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n"; print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; @@ -75,6 +81,7 @@ while (my $line = <FUSION>) { foreach my $i (0..$#row) { $hash{$hline[$i]} = $row[$i]; } + my @filter; my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint}); my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint}); $hash{LeftBreakpoint} = join(":",$left_chr,$left_pos); @@ -83,7 +90,9 @@ while (my $line = <FUSION>) { $hash{RightStrand} = $right_strand; $hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0]; $hash{RightGene} = (split(/\^/,$hash{RightGene}))[0]; - next unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}); + unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}) { + push @filter, 'OutsideGeneList'; + } $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; my $fname = join("--",$hash{LeftGene},$hash{RightGene}); my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); @@ -95,20 +104,40 @@ while (my $line = <FUSION>) { } $hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME'); my ($dna_support,$rna_support)=("no") x 2; - if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { - $rna_support = "yes"; - print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, - $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, - $hash{RightStrand},$hash{SumRNAReads},0),"\n"; - print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, - $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, - $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$hash{annots}),"\n"; - print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", - $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; - print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", - $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; + $hash{annots} =~ s/CHROMOSOMAL\[/CHROMOSOMAL /; + $hash{annots} =~ s/\]|\[|\"//g; + @annots = split(/,/,$hash{annots}); + my ($chrom_type_dist) = grep(/CHROMOSOMAL/,@annots); + my ($chrtype,$chrdist) = split(/ /,$chrom_type_dist); + @annot2 = grep(!/CHROMOSOMAL/, @annots); + $fusion_annot = ''; + if (scalar(@annot2) > 0) { + $fusion_annot = join(",",@annot2); } + if ($known{$fname2} || $fusion_annot =~ m/CCLE|Cosmic|FA_CancerSupp|Klijn_CellLines|Mitelman|YOSHIHARA_TCGA|chimer/i) { + push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 3); + }else { + push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 5); + } + $rna_support = "yes"; + if ($left_chr eq $right_chr) { + $diff = abs($right_pos-$left_pos); + push @filter, 'ReadThrough' if ($diff < 200000); + } + my $qc ='PASS'; + if (scalar(@filter) > 0) { + $qc = join(";","FailedQC",@filter); + } + + print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, + $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, + $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n"; + print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", + $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; + print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", + $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; } -close OUT; + +close OAS; close OUTIR; diff --git a/genect_rnaseq/cBioPortal_documents.pl b/genect_rnaseq/cBioPortal_documents.pl index 5b672a32055bc7dc7e54bed7b67170e371d1f4ed..31f6803d2e6dd0bde08ced0313be1b4f62910801 100644 --- a/genect_rnaseq/cBioPortal_documents.pl +++ b/genect_rnaseq/cBioPortal_documents.pl @@ -6,15 +6,17 @@ use File::Basename; my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h'); -open ENT_ENS, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; +open ENT_GENE, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; my %entrez; -my $ent_header = <ENT_ENS>; -while (my $line = <ENT_ENS>){ +my %entgene; +my $ent_header = <ENT_GENE>; +while (my $line = <ENT_GENE>){ chomp $line; my @row = split(/\t/, $line); - $entrez{$row[2]}=$row[1]; + $entgene{$row[6]}{$row[2]}=$row[1]; + #$entrez{$row[2]}=$row[1]; } -close ENT_ENS; +close ENT_GENE; open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!; my $gn_header = <ENT_ENS>; my %ensym; @@ -41,12 +43,14 @@ if($opt{fpkm}){ my $fpkm_header = <FPKM>; while(my $line = <FPKM>){ chomp $line; - my ($id,$gene,$ref,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line); + my $entrezid=0; + my ($id,$gene,$chr,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line); + $chr =~ s/^chr//g; my $ensembl = (split(/\./,$id))[0]; if ($entrez{$ensembl}) { $entrezid = $entrez{$ensembl}; - }else { - $entrezid = $entrez{$gene}; + }elsif($entgene{$chr}{$gene}){ + $entrezid = $entgene{$chr}{$gene}; } next unless ($entrezid); print OUTF join("\t",$entrezid,$fpkm),"\n"; @@ -68,19 +72,24 @@ if($opt{logcpm}){ chomp($line); my @row = split(/\t/,$line); my $gene = $row[0]; + my $chrom = (split(";",$row[1]))[0]; + $chrom =~ s/^chr//g; my $ct = $row[-1]; next if($gene =~ m/^__/); - $cts{$gene}{$sample} = $ct; + $cts{$chrom}{$gene} = $ct; $total += $ct; } + print $total."\n"; close IN; - foreach $ens (keys %cts) { - next unless $entrez{$ens}; - unless ($cts{$ens}) { - $cts{$ens} = 0; - } - $cpm = ($cts{$ens}/$total)*1e6; - print OUTL join("\t",$entrez{$ens},sprintf("%.2f",log2($cpm))),"\n"; + foreach my $ens_chr (keys %cts) { + foreach my $ens (keys $cts{$ens_chr}){ + next unless $entgene{$ens_chr}{$ens};#($entrez{$ens} or $entgene{$gene}{$chrom}); + #unless ($cts{$ens_chr}{$ens}){ + # $cts{$ens_chr}{$ens} = 0; + #} + $cpm = ($cts{$ens_chr}{$ens}/$total)*1e6; + print OUTL join("\t",$entgene{$ens_chr}{$ens},sprintf("%.2f",log2($cpm))),"\n"; + } } close OUTL; }