Skip to content
Snippets Groups Projects
Commit d8b85458 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

update gene fusion 2 answer script

parent 31faa289
Branches
Tags
No related merge requests found
...@@ -14,12 +14,14 @@ while (my $line = <ENT>) { ...@@ -14,12 +14,14 @@ while (my $line = <ENT>) {
$entrez{$row[2]} = $row[1]; $entrez{$row[2]} = $row[1];
} }
} }
open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!; open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!;
while (my $line = <OM>) { while (my $line = <OM>) {
chomp($line); chomp($line);
$known{$line} = 1; $known{$line} = 1;
} }
close OM; close OM;
open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!;
while (my $line = <OM>) { while (my $line = <OM>) {
chomp($line); chomp($line);
...@@ -43,20 +45,24 @@ foreach $efile (@exonfiles) { ...@@ -43,20 +45,24 @@ foreach $efile (@exonfiles) {
push @rightexons, $exonnum; push @rightexons, $exonnum;
} }
} }
$exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); if ($leftexons[0] eq $leftexons[-1]) {
$exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); $exonnuminfo{$dir}{leftgene} = $leftexons[0];
}else {
$exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
}
if ($rightexons[0] eq $rightexons[-1]) {
$exonnuminfo{$dir}{rightgene} = $rightexons[0];
}else {
$exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
}
} }
open OUT, ">$opt{prefix}\.translocations.txt" or die $!;
open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!;
open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!;
print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint",
"RightBreakpoint","LeftStrand","RightStrand","RNAReads",
"DNAReads"),"\n";
print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand",
"RightGene","RightBreakpoint","RightGeneExons","RightStrand", "RightGene","RightBreakpoint","RightGeneExons","RightStrand",
"RNAReads","DNAReads","FusionType","Annot"),"\n"; "RNAReads","DNAReads","FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n";
print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode",
"Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; "Fusion","DNA_support","RNA_support","Method","Frame"),"\n";
...@@ -75,6 +81,7 @@ while (my $line = <FUSION>) { ...@@ -75,6 +81,7 @@ while (my $line = <FUSION>) {
foreach my $i (0..$#row) { foreach my $i (0..$#row) {
$hash{$hline[$i]} = $row[$i]; $hash{$hline[$i]} = $row[$i];
} }
my @filter;
my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint}); my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint});
my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint}); my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint});
$hash{LeftBreakpoint} = join(":",$left_chr,$left_pos); $hash{LeftBreakpoint} = join(":",$left_chr,$left_pos);
...@@ -83,7 +90,9 @@ while (my $line = <FUSION>) { ...@@ -83,7 +90,9 @@ while (my $line = <FUSION>) {
$hash{RightStrand} = $right_strand; $hash{RightStrand} = $right_strand;
$hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0]; $hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0];
$hash{RightGene} = (split(/\^/,$hash{RightGene}))[0]; $hash{RightGene} = (split(/\^/,$hash{RightGene}))[0];
next unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}); unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}) {
push @filter, 'OutsideGeneList';
}
$hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
my $fname = join("--",$hash{LeftGene},$hash{RightGene}); my $fname = join("--",$hash{LeftGene},$hash{RightGene});
my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
...@@ -95,20 +104,40 @@ while (my $line = <FUSION>) { ...@@ -95,20 +104,40 @@ while (my $line = <FUSION>) {
} }
$hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME'); $hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME');
my ($dna_support,$rna_support)=("no") x 2; my ($dna_support,$rna_support)=("no") x 2;
if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { $hash{annots} =~ s/CHROMOSOMAL\[/CHROMOSOMAL /;
$rna_support = "yes"; $hash{annots} =~ s/\]|\[|\"//g;
print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, @annots = split(/,/,$hash{annots});
$hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, my ($chrom_type_dist) = grep(/CHROMOSOMAL/,@annots);
$hash{RightStrand},$hash{SumRNAReads},0),"\n"; my ($chrtype,$chrdist) = split(/ /,$chrom_type_dist);
print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, @annot2 = grep(!/CHROMOSOMAL/, @annots);
$hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, $fusion_annot = '';
$hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$hash{annots}),"\n"; if (scalar(@annot2) > 0) {
print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", $fusion_annot = join(",",@annot2);
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
} }
if ($known{$fname2} || $fusion_annot =~ m/CCLE|Cosmic|FA_CancerSupp|Klijn_CellLines|Mitelman|YOSHIHARA_TCGA|chimer/i) {
push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 3);
}else {
push @filter, 'LowReadCt' if ($hash{SumRNAReads} < 5);
}
$rna_support = "yes";
if ($left_chr eq $right_chr) {
$diff = abs($right_pos-$left_pos);
push @filter, 'ReadThrough' if ($diff < 200000);
}
my $qc ='PASS';
if (scalar(@filter) > 0) {
$qc = join(";","FailedQC",@filter);
}
print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
$hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
$hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n";
print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
} }
close OUT;
close OAS;
close OUTIR; close OUTIR;
...@@ -6,15 +6,17 @@ use File::Basename; ...@@ -6,15 +6,17 @@ use File::Basename;
my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h'); my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h');
open ENT_ENS, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; open ENT_GENE, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
my %entrez; my %entrez;
my $ent_header = <ENT_ENS>; my %entgene;
while (my $line = <ENT_ENS>){ my $ent_header = <ENT_GENE>;
while (my $line = <ENT_GENE>){
chomp $line; chomp $line;
my @row = split(/\t/, $line); my @row = split(/\t/, $line);
$entrez{$row[2]}=$row[1]; $entgene{$row[6]}{$row[2]}=$row[1];
#$entrez{$row[2]}=$row[1];
} }
close ENT_ENS; close ENT_GENE;
open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!; open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!;
my $gn_header = <ENT_ENS>; my $gn_header = <ENT_ENS>;
my %ensym; my %ensym;
...@@ -41,12 +43,14 @@ if($opt{fpkm}){ ...@@ -41,12 +43,14 @@ if($opt{fpkm}){
my $fpkm_header = <FPKM>; my $fpkm_header = <FPKM>;
while(my $line = <FPKM>){ while(my $line = <FPKM>){
chomp $line; chomp $line;
my ($id,$gene,$ref,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line); my $entrezid=0;
my ($id,$gene,$chr,$strand,$start,$end,$coverage,$fpkm,$tpm) = split(/\t/,$line);
$chr =~ s/^chr//g;
my $ensembl = (split(/\./,$id))[0]; my $ensembl = (split(/\./,$id))[0];
if ($entrez{$ensembl}) { if ($entrez{$ensembl}) {
$entrezid = $entrez{$ensembl}; $entrezid = $entrez{$ensembl};
}else { }elsif($entgene{$chr}{$gene}){
$entrezid = $entrez{$gene}; $entrezid = $entgene{$chr}{$gene};
} }
next unless ($entrezid); next unless ($entrezid);
print OUTF join("\t",$entrezid,$fpkm),"\n"; print OUTF join("\t",$entrezid,$fpkm),"\n";
...@@ -68,19 +72,24 @@ if($opt{logcpm}){ ...@@ -68,19 +72,24 @@ if($opt{logcpm}){
chomp($line); chomp($line);
my @row = split(/\t/,$line); my @row = split(/\t/,$line);
my $gene = $row[0]; my $gene = $row[0];
my $chrom = (split(";",$row[1]))[0];
$chrom =~ s/^chr//g;
my $ct = $row[-1]; my $ct = $row[-1];
next if($gene =~ m/^__/); next if($gene =~ m/^__/);
$cts{$gene}{$sample} = $ct; $cts{$chrom}{$gene} = $ct;
$total += $ct; $total += $ct;
} }
print $total."\n";
close IN; close IN;
foreach $ens (keys %cts) { foreach my $ens_chr (keys %cts) {
next unless $entrez{$ens}; foreach my $ens (keys $cts{$ens_chr}){
unless ($cts{$ens}) { next unless $entgene{$ens_chr}{$ens};#($entrez{$ens} or $entgene{$gene}{$chrom});
$cts{$ens} = 0; #unless ($cts{$ens_chr}{$ens}){
} # $cts{$ens_chr}{$ens} = 0;
$cpm = ($cts{$ens}/$total)*1e6; #}
print OUTL join("\t",$entrez{$ens},sprintf("%.2f",log2($cpm))),"\n"; $cpm = ($cts{$ens_chr}{$ens}/$total)*1e6;
print OUTL join("\t",$entgene{$ens_chr}{$ens},sprintf("%.2f",log2($cpm))),"\n";
}
} }
close OUTL; close OUTL;
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment