Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
unionvcf.pl 3.21 KiB
#!/usr/bin/perl 
#migrate_db.pl

my $headerfile = shift @ARGV;
my @vcffiles = @ARGV;

open HEADER, "<$headerfile" or die $!;
open OUT, ">int.vcf" or die $!;
while (my $line = <HEADER>) {
    chomp($line);
    print OUT $line,"\n";;
}
close HEADER;
my @sampleorder;

my %headerlines;
foreach $vcf (@vcffiles) {
    $caller = (split(/\./,$vcf))[1];
    open VCF, "gunzip -c $vcf|" or die $!;
    my @sampleids;
    while (my $line = <VCF>) {
	chomp($line);
	if ($line =~ m/#/) {
	    if ($line =~ m/#CHROM/) {
		($chromhd, $posd,$idhd,$refhd,$althd,$scorehd,
		 $filterhd,$annothd,$formathd,@sampleids) = split(/\t/, $line);
		foreach $j (0..$#sampleids) {
		    $sampleids[$j] =~ s/\.final//g;
		}
		unless (@sampleorder) {
		    @sampleorder = @sampleids;
		    print OUT $line,"\n";
		}
		next;
	    }
	    next;
	}
	my ($chrom, $pos,$id,$ref,$alt,$score,
	    $filter,$annot,$format,@gts) = split(/\t/, $line);
	my %hash = ();
	foreach $a (split(/;/,$annot)) {
	    my ($key,$val) = split(/=/,$a);
	    $hash{$key} = $val;
	}
	my @deschead = split(/:/,$format);
	my $newformat = 'GT:DP:AD:AO:RO';
	my %newgts;
	my $missingGT = 0;
      FG:foreach my $i (0..$#gts) {
	  my $allele_info = $gts[$i];
	  my $sid = $sampleids[$i];
	  my @gtinfo = split(/:/,$allele_info);
	  my %gtdata;
	  if ($allele_info eq '.') {
	      $newgts{$sid} = '.:.:.:.:.';
	      $missingGT ++;
	      next FG;
	  }
	  foreach my $i (0..$#deschead) {
	      $gtdata{$deschead[$i]} = $gtinfo[$i];
	  }
	  if ($gtdata{GT} eq './.') {
	      $newgts{$sid} = '.:.:.:.:.';
	      $missingGT ++;
	      next FG;
	  }
	  if ($gtdata{AD}){
	      ($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
	      $gtdata{AO} = join(",",@alts);
	      $gtdata{DP} = $gtdata{RO};