From c2e0f1fac2b0a90fd2d87d2aba9d885855d22272 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Tue, 19 May 2020 08:57:45 -0500
Subject: [PATCH] union header bugs -- samples names and missing definitions

---
 variants/uniform_vcf_gt.pl | 9 +++++++--
 variants/union.sh          | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl
index 661f7f2..85b8e06 100755
--- a/variants/uniform_vcf_gt.pl
+++ b/variants/uniform_vcf_gt.pl
@@ -9,13 +9,18 @@ open VCF, "gunzip -c $vcf|" or die $!;
 while (my $line = <VCF>) {
     chomp($line);
     if ($line =~ m/#/) {
+	next if ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/);
 	if ($line =~ m/#CHROM/) {
 	    print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n";
 	    print OUT "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">\n";
 	    print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n";
 	    print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n";
-	}
-	unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) {
+	    my ($c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames) = split(/\t/, $line);
+	    foreach my $j (0..$#snames) {
+		$snames[$j] =~ s/\[|\]|\.consensus|\.final//g;
+	    }
+	    print OUT join("\t",$c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames),"\n";
+	} else {
 	    print OUT $line,"\n";
 	}
 	next;
diff --git a/variants/union.sh b/variants/union.sh
index 5dd626c..8ecfe87 100755
--- a/variants/union.sh
+++ b/variants/union.sh
@@ -47,6 +47,9 @@ for i in ${dir}/*.vcf.gz; do
     fi
 done 
 
-perl $baseDir/unionvcf.pl ${index_path}/union.header.vcf $list2
+echo "##fileformat=VCFv4.2" > header.vcf
+zcat ${dir}/*.vcf.gz |grep "##" |grep -v '#fileformat' |sort -u |grep 'ALT\|FILTER\|FORMAT\|INFO' >> header.vcf
+
+perl $baseDir/unionvcf.pl header.vcf $list2
 perl $baseDir/vcfsorter.pl ${index_path}/genome.dict int.vcf |bgzip > ${pair_id}.union.vcf.gz
 
-- 
GitLab