From 1588fae4bfd4c341b62c6af9904c9e60a9f42489 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Tue, 7 May 2019 20:27:41 -0500
Subject: [PATCH] update union and snsp with mnv->snv

---
 variants/germline_vc.sh | 15 ++++++++++-----
 variants/norm_annot.sh  |  3 ++-
 variants/unionvcf.pl    |  6 +++---
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
index 8b7ee19..a714ef7 100755
--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -33,21 +33,21 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
 then
     SLURM_CPUS_ON_NODE=1
 fi
-if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
+if [[ -s "${index_path}/dbSnp.vcf.gz" ]]
 then
     dbsnp="${index_path}/dbSnp.vcf.gz"
 else 
     echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
     usage
 fi
-if [[ -a "${index_path}/GoldIndels.vcf.gz" ]]
+if [[ -s "${index_path}/GoldIndels.vcf.gz" ]]
 then
     knownindel="${index_path}/GoldIndels.vcf.gz"
 else 
     echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz"
     usage
 fi
-if [[ -a "${index_path}/genome.fa" ]]
+if [[ -s "${index_path}/genome.fa" ]]
 then
     reffa="${index_path}/genome.fa"
     dict="${index_path}/genome.dict"
@@ -87,7 +87,10 @@ then
     gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
     user=$USER
     module load gatk/4.x singularity/2.6.1
-    mkdir /tmp/${user}
+    if [[ ! -e "/tmp/${user}" ]]
+    then
+	mkdir /tmp/${user}
+    fi
     export TMP_HOME=/tmp/${user}
     gvcflist=''
     for i in *.bam; do
@@ -98,7 +101,9 @@ then
 	
 	gvcflist="$gvcflist --variant ${prefix}.gatk.g.vcf"
     done
-    singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenotypeGVCFs $gvcflist -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf
+    interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' |paste -sd "," -`
+    singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb --intervals $interval
+    singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf
     bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz
     tabix ${pair_id}.gatk.vcf.gz
 elif [[ $algo == 'platypus' ]]
diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh
index 4f89845..7aafd26 100755
--- a/variants/norm_annot.sh
+++ b/variants/norm_annot.sh
@@ -30,4 +30,5 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
 bgzip -f ${pair_id}.uniform.vcf
 j=${pair_id}.uniform.vcf.gz
 tabix -f $j
-bcftools norm -m - -O z -o ${pair_id}.norm.vcf.gz $j
+bcftools norm -m - -O v $j  | /project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub - -a -o ${pair_id}.norm.vcf
+bgzip -f ${pair_id}.norm.vcf
diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl
index 37452dc..a4db69e 100755
--- a/variants/unionvcf.pl
+++ b/variants/unionvcf.pl
@@ -125,10 +125,10 @@ foreach $vcf (@vcffiles) {
     my @filts = split(";",$filter);
     my %filterqc = map {$_ => 1} @filts;
     delete $filterqc{'.'};
-    if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60) || 
-	($hash{SAP} && $hash{SAP} > 20)) {
+    if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60)) { # ||($hash{SAP} && $hash{SAP} > 20)) {
 	$filterqc{strandBias} = 1;
-    }if (scalar(keys %filterqc) > 1) {
+	$annot .= ';strandBias=1';
+    }if (scalar(keys %filterqc) > 0) {
 	$filter = join(";",keys %filterqc);
     }else {
 	$filter = '.';
-- 
GitLab