Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
transvar2bed.pl 4.98 KiB
use strict;
use warnings;
use List::Util qw(min max);

my ($inputFile, $geneInfoFile, $refversion, @databaseList) = @ARGV;
my @databaseOptionList = map {"--$_"} @databaseList;
my %symbolHash = ();
my %synonymSymbolHash = ();
{
	open(my $reader, $geneInfoFile);
	my @columnList = ();
	while(my $line = <$reader>) {
		chomp($line);
		if($line =~ s/^#//) {
			@columnList = split(/\t/, $line);
		} else {
			my %tokenHash = ();
			@tokenHash{@columnList} = split(/\t/, $line);
			$symbolHash{$tokenHash{'Symbol'}} = 1;
			if($tokenHash{'Synonyms'} ne '-') {
				$synonymSymbolHash{$_} = $tokenHash{'Symbol'} foreach(split(/\|/, $tokenHash{'Synonyms'}));
			}
		}
	}
	close($reader);
}
my %geneRegionListHash = ();
my %geneRegionListListHash = ();
{
	chomp(my @configList = `transvar current --refversion $refversion`);
	my %configHash = map {$_->[0] => $_->[1]} map {[split(/: /, $_, 2)]} @configList;
	open(my $reader, "cat @configHash{@databaseList} |");
	while(my $line = <$reader>) {
		chomp($line);
		my @tokenList = split(/\t/, $line);
		push(@{$geneRegionListHash{$tokenList[0]}}, "$tokenList[6]:$tokenList[4]-$tokenList[5]");
		if($tokenList[10] =~ /^\[\((.+)\)\]$/) {
			my $regions = $1;
			my @regionList = map {/^([0-9]+), ([0-9]+)$/ ? "$tokenList[6]:$1-$2" : ()} split(/\), \(/, $regions);
			@regionList = reverse @regionList if($tokenList[7] eq '-');
			push(@{$geneRegionListListHash{$tokenList[0]}}, \@regionList);
		}
	}
	close($reader);
}
open(my $reader, $inputFile);
while(my $line = <$reader>) {
	chomp($line);
	s/^ *//, s/ *$// foreach(my ($sample, $gene, $mutation) = split(/\t/, $line));
	if(defined($symbolHash{$gene})) {
	} elsif(defined($gene = $synonymSymbolHash{$gene})) {
	} else {
		print STDERR join("\t", 'unknown_gene', $line), "\n";
		next;
	}
	if($mutation =~ /^[A-Z][0-9]+[A-Z]$/) { # missense
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[A-Z][0-9]+$/) { # aminoacid
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[A-Z][0-9]+[*]$/) { # nonsense
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[*][0-9]+[A-Z]$/) { # readthrough
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[A-Z][0-9]+_[A-Z][0-9]+/) { # deletion or insertion
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[A-Z][0-9]+del/) { # aminoacid deletion
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[A-Z][0-9]+fs/) { # frameshift
		printBedLine($line, getRegionList(transvar('p', $gene, $mutation)));
	} elsif($mutation =~ /^[0-9]+[+-][0-9]+/) { # splicing