From c2e0f1fac2b0a90fd2d87d2aba9d885855d22272 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Tue, 19 May 2020 08:57:45 -0500 Subject: [PATCH] union header bugs -- samples names and missing definitions --- variants/uniform_vcf_gt.pl | 9 +++++++-- variants/union.sh | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index 661f7f2..85b8e06 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -9,13 +9,18 @@ open VCF, "gunzip -c $vcf|" or die $!; while (my $line = <VCF>) { chomp($line); if ($line =~ m/#/) { + next if ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/); if ($line =~ m/#CHROM/) { print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n"; print OUT "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">\n"; print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n"; print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n"; - } - unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) { + my ($c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames) = split(/\t/, $line); + foreach my $j (0..$#snames) { + $snames[$j] =~ s/\[|\]|\.consensus|\.final//g; + } + print OUT join("\t",$c, $p,$i,$r,$a,$s,$f,$an,$fo,@snames),"\n"; + } else { print OUT $line,"\n"; } next; diff --git a/variants/union.sh b/variants/union.sh index 5dd626c..8ecfe87 100755 --- a/variants/union.sh +++ b/variants/union.sh @@ -47,6 +47,9 @@ for i in ${dir}/*.vcf.gz; do fi done -perl $baseDir/unionvcf.pl ${index_path}/union.header.vcf $list2 +echo "##fileformat=VCFv4.2" > header.vcf +zcat ${dir}/*.vcf.gz |grep "##" |grep -v '#fileformat' |sort -u |grep 'ALT\|FILTER\|FORMAT\|INFO' >> header.vcf + +perl $baseDir/unionvcf.pl header.vcf $list2 perl $baseDir/vcfsorter.pl ${index_path}/genome.dict int.vcf |bgzip > ${pair_id}.union.vcf.gz -- GitLab