From a65f5d7aba3a6ebc6f169093cec7fce2c83468b4 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Sat, 7 Jul 2018 15:56:22 -0500
Subject: [PATCH] update norm/uniform

---
 variants/annotvcf.sh        |  0
 variants/cnvkit.sh          |  0
 variants/filter_cnvkit.pl   |  0
 variants/gatkrunner.sh      |  0
 variants/germline_vc.sh     |  0
 variants/norm_annot.sh      | 34 ++++++++++++++++++++++++++++++++++
 variants/somatic_callers.sh |  0
 variants/somatic_vc.sh      |  0
 variants/svannot.pl         |  0
 variants/svcalling.sh       |  0
 variants/uniform_vcf_gt.pl  | 16 +++++++++++-----
 variants/union.sh           |  0
 12 files changed, 45 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 variants/annotvcf.sh
 mode change 100644 => 100755 variants/cnvkit.sh
 mode change 100644 => 100755 variants/filter_cnvkit.pl
 mode change 100644 => 100755 variants/gatkrunner.sh
 mode change 100644 => 100755 variants/germline_vc.sh
 create mode 100755 variants/norm_annot.sh
 mode change 100644 => 100755 variants/somatic_callers.sh
 mode change 100644 => 100755 variants/somatic_vc.sh
 mode change 100644 => 100755 variants/svannot.pl
 mode change 100644 => 100755 variants/svcalling.sh
 mode change 100644 => 100755 variants/union.sh

diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh
old mode 100644
new mode 100755
diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh
old mode 100644
new mode 100755
diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl
old mode 100644
new mode 100755
diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh
old mode 100644
new mode 100755
diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
old mode 100644
new mode 100755
diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh
new file mode 100755
index 0000000..6134c3b
--- /dev/null
+++ b/variants/norm_annot.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+#union.sh
+
+usage() {
+  echo "-h Help documentation for gatkrunner.sh"
+  echo "-r  --Reference Genome: GRCh38 or GRCm38"
+  echo "-p  --Prefix for output file name"
+  echo "-v  --VCF File" 
+  echo "Example: bash union.sh -p prefix -r /path/GRCh38"
+  exit 1
+}
+OPTIND=1 # Reset OPTIND
+while getopts :r:p:v:h opt
+do
+    case $opt in
+        r) index_path=$OPTARG;;
+        p) pair_id=$OPTARG;;
+	v) vcf=$OPTARG;;
+        h) usage;;
+    esac
+done
+function join_by { local IFS="$1"; shift; echo "$*"; }
+shift $(($OPTIND -1))
+baseDir="`dirname \"$0\"`"
+
+source /etc/profile.d/modules.sh
+module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q 
+
+prefix="${vcf%.vcf.gz}"
+perl $baseDir\/uniform_vcf_gt.pl $vcf
+bgzip -f ${prefix}.uniform.vcf
+j=${prefix}.uniform.vcf.gz
+tabix -f $j
+bcftools norm -m - -O z -o ${prefix}.norm.vcf.gz $j
diff --git a/variants/somatic_callers.sh b/variants/somatic_callers.sh
old mode 100644
new mode 100755
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
old mode 100644
new mode 100755
diff --git a/variants/svannot.pl b/variants/svannot.pl
old mode 100644
new mode 100755
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
old mode 100644
new mode 100755
diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl
index 7dd813e..5f744a4 100755
--- a/variants/uniform_vcf_gt.pl
+++ b/variants/uniform_vcf_gt.pl
@@ -9,6 +9,12 @@ open VCF, "gunzip -c $vcf|" or die $!;
 while (my $line = <VCF>) {
     chomp($line);
     if ($line =~ m/#/) {
+	if ($line =~ m/#CHROM/) {
+	    print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n";
+	    print OUT "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">\n";
+	    print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n";
+	    print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n";
+	}
 	print OUT $line,"\n";
 	next;
     }
@@ -34,11 +40,6 @@ while (my $line = <VCF>) {
       foreach my $i (0..$#deschead) {
 	  $gtdata{$deschead[$i]} = $gtinfo[$i];
       }
-      if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') {
-	  push @newgts, '.:.:.:.:.';
-	  $missingGT ++;
-	  next FG;
-      }
       if ($gtdata{AD}){
 	  ($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
 	  $gtdata{AO} = join(",",@alts);
@@ -60,6 +61,11 @@ while (my $line = <VCF>) {
       if ($gtdata{DP} && $gtdata{DP} < 5) {
 	  $missingGT ++;
       }
+      if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') {
+	  push @newgts, '.:.:.:.:.';
+	  $missingGT ++;
+	  next FG;
+      }
       push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
   }
     next if ($missingGT == scalar(@gts));
diff --git a/variants/union.sh b/variants/union.sh
old mode 100644
new mode 100755
-- 
GitLab