#!/usr/bin/perl -w #svanno.pl use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); my %opt = (); my $results = GetOptions (\%opt,'input|i=s','refdb|r=s','help|h'); my $vcf = $opt{input}; my $outfile = $vcf; $outfile =~ s/vcf/txt/g; open OUT, ">$outfile" or die $!; my $gffile = $vcf; $gffile =~ s/vcf/genefusion.txt/g; open GF, ">$gffile" or die $!; my %lines; my %eventid; my $ct = 0; open IN, "<$vcf" or die $!; while (my $line = <IN>) { chomp($line); if ($line =~ m/^#CHROM/) { my @header = split(/\t/,$line); ($chrom, $pos,$id,$ref,$alt,$score, $filter,$info,$format,@subjacc) = split(/\t/, $line); } next if $line =~ m/#/; my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); $ct ++; my %hash = (); foreach $a (split(/;/,$annot)) { my ($key,$val) = split(/=/,$a); $hash{$key} = $val unless ($hash{$key}); } if ($id eq 'N') { $id = 'NB'.sprintf("%06s",$ct); } my $evid = (split(/_/,$id))[0]; $hash{'END'} = $pos+1 unless $hash{'END'}; my ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err); next unless $hash{ANN}; F1:foreach $trx (split(/,/,$hash{ANN})) { ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); next unless ($impact && $impact =~ m/HIGH|MODERATE/); next if ($effect eq 'sequence_feature'); last F1; } next unless ($impact && $impact =~ m/HIGH|MODERATE/); next unless ($gene); next if ($done{$chrom}{$pos}); $done{$chrom}{$pos} = 1 @deschead = split(":",$format); F1:foreach $sample (@subjacc) { my $allele_info = shift @gts; @ainfo = split(/:/, $allele_info); my %gtinfo = (); foreach $k (0..$#deschead) { $gtinfo{$deschead[$k]} = $ainfo[$k]; } unless ($gtinfo{SU}) { $gtinfo{SU} = 0; $gtinfo{SU} = $gtinfo{RV}+$gtinfo{DV} if ($gtinfo{RV} && $gtinfo{DV}); } next if ($gtinfo{SU} < 10); if (lc($alt) =~ m/chr(\w+):(\d+)/i) { $chr2 = $1; $end = $2; if ($chr2 eq $chrom) { print OUT join("\t",$sample,$gene,$chrom,$pos,$end,$id,$hash{SVTYPE},$effect, $featureid,$codon,$aa,$rank,$gtinfo{SU}),"\n"; }elsif ($id =~ m/_\d+/ && $effect =~ m/gene_fusion/) { $gene =~ s/\&/--/; my ($left_gene,$right_gene) = split(/--/,$gene); next unless ($right_gene); my $left_breakpoint = join(":",$chrom,$pos); my $right_breakpoint = join(":",'chr'.$chr2,$end); my ($leftstrand,$rightstrand) = split(//,$hash{STRANDS}); print GF join("\t",$sample,$gene,$left_gene,$right_gene, $left_breakpoint, $right_breakpoint,$leftstrand,$rightstrand,'',$gtinfo{SU}),"\n"; }else { print OUT join("\t",$sample,$gene,$chrom,$pos,$end,$ id,$hash{SVTYPE},$effect, $featureid,$codon,$aa,$rank,$gtinfo{SU}),"\n"; } }else { if ($hash{CHR2} && $hash{CHR2} eq $chrom) { $end = $hash{END}; print OUT join("\t",$sample,$gene,$chrom,$pos,$end,$id,$hash{SVTYPE},$effect, $featureid,$codon,$aa,$rank,$gtinfo{SU}),"\n"; }elsif ($hash{CHR2} && $hash{CHR2} ne $chrom) { $gene =~ s/\&/--/; my ($left_gene,$right_gene) = split(/--/,$gene); my $left_breakpoint = join(":",$chrom,$pos); my $right_breakpoint = join(":",$hash{CHR2},$hash{END}); print GF join("\t",$sample,$gene,$left_gene,$right_gene, $left_breakpoint,$right_breakpoint,'','','',$gtinfo{SU}),"\n"; }unless ($hash{CHR2}) { $hash{END} = $pos + 1 unless ($hash{END}); print OUT join("\t",$sample,$gene,$chrom,$pos,$hash{END},$id,$hash{SVTYPE},$effect, $featureid,$codon,$aa,$rank,$gtinfo{SU}),"\n"; } } } } close IN;