diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index 661f7f21008897d707c753a3f9360eeacfcca108..85b8e06858e4731489ea082957bbf2049e4f821b 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -9,13 +9,18 @@ open VCF, "gunzip -c $vcf|" or die $!; while (my $line = <VCF>) { chomp($line); if ($line =~ m/#/) { + next if ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/); if ($line =~ m/#CHROM/) { print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n"; print OUT "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">\n"; print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n"; print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n"; - } - unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) { + my ($c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames) = split(/\t/, $line); + foreach my $j (0..$#snames) { + $snames[$j] =~ s/\[|\]|\.consensus|\.final//g; + } + print OUT join("\t",$c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames),"\n"; + } else { print OUT $line,"\n"; } next; diff --git a/variants/union.sh b/variants/union.sh index 5dd626ce70070000a2e75e83380b5288a1aa110d..8ecfe87dbeefad6f691385763a1c9315b6ff2854 100755 --- a/variants/union.sh +++ b/variants/union.sh @@ -47,6 +47,9 @@ for i in ${dir}/*.vcf.gz; do fi done -perl $baseDir/unionvcf.pl ${index_path}/union.header.vcf $list2 +echo "##fileformat=VCFv4.2" > header.vcf +zcat ${dir}/*.vcf.gz |grep "##" |grep -v '#fileformat' |sort -u |grep 'ALT\|FILTER\|FORMAT\|INFO' >> header.vcf + +perl $baseDir/unionvcf.pl header.vcf $list2 perl $baseDir/vcfsorter.pl ${index_path}/genome.dict int.vcf |bgzip > ${pair_id}.union.vcf.gz