From ec1a962cb9fe101268cd03d97afade8c0f2942be Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 31 Jul 2020 11:13:23 -0500
Subject: [PATCH] update snpeff ref; remove unnecessary ref data checks

---
 variants/germline_vc.sh | 25 ++++++++-----------------
 variants/somatic_vc.sh  | 23 ++++++-----------------
 variants/svcalling.sh   |  3 ++-
 3 files changed, 16 insertions(+), 35 deletions(-)

diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
index cd1a1c7..0dc3659 100755
--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+nucleu#!/bin/bash
 #germline_vc.sh
 
 usage() {
@@ -36,21 +36,7 @@ if [[ -z $NPROC ]]
 then
     NPROC=`nproc`
 fi
-if [[ -s "${index_path}/dbSnp.vcf.gz" ]]
-then
-    dbsnp="${index_path}/dbSnp.vcf.gz"
-    gatk4_dbsnp="${index_path}/dbSnp.gatk4.vcf.gz"
-else 
-    echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
-    usage
-fi
-if [[ -s "${index_path}/GoldIndels.vcf.gz" ]]
-then
-    knownindel="${index_path}/GoldIndels.vcf.gz"
-else 
-    echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz"
-    usage
-fi
+
 if [[ -s "${index_path}/genome.fa" ]]
 then
     reffa="${index_path}/genome.fa"
@@ -112,6 +98,12 @@ then
     bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
 elif [[ $algo == 'gatk' ]]
 then
+    gatk4_dbsnp="${index_path}/dbSnp.gatk4.vcf.gz"
+    if [[ ! -f "${index_path}/dbSnp.gatk4.vcf.gz" ]]
+    then
+	echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
+	usage
+    fi
     user=$USER
     module load gatk/4.1.4.0
     gvcflist=''
@@ -129,7 +121,6 @@ then
     tabix ${pair_id}.gatk.vcf.gz
 elif [ $algo == 'mutect' ]
 then
-  gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
   module load gatk/4.1.4.0 parallel/20150122
   threads=`expr $NPROC / 2`
   bamlist=''
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index 94d4ece..d3701aa 100755
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -56,7 +56,6 @@ if [[ -f $pon ]]
 then
     ponopt="--pon $pon"
 fi
-
 if [[ -a "${index_path}/genome.fa" ]]
 then
     reffa="${index_path}/genome.fa"
@@ -65,21 +64,7 @@ else
     echo "Missing Fasta File: ${index_path}/genome.fa"
     usage
 fi
-if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
-then
-    dbsnp="${index_path}/dbSnp.vcf.gz"
-else 
-    echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
-    usage
-fi
 
-if [[ -a "${index_path}/cosmic.vcf.gz" ]]
-then
-    cosmic=${index_path}/cosmic.vcf.gz
-else 
-    echo "Missing InDel File: ${index_path}/cosmic.vcf.gz"
-    usage
-fi
 baseDir="`dirname \"$0\"`"
 
 interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'`
@@ -96,7 +81,6 @@ then
     nid=`samtools view -H ${normal} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
 fi
 
-
 if [ $algo == 'strelka2' ]
 then
     module load strelka/2.9.10 manta/1.3.1 
@@ -127,6 +111,12 @@ then
 elif [ $algo == 'virmid' ]
 then 
     module load virmid/1.2
+    cosmic=${index_path}/cosmic.vcf.gz
+    if [[ ! -f "${index_path}/cosmic.vcf.gz" ]]
+    then
+	echo "Missing InDel File: ${index_path}/cosmic.vcf.gz"
+	usage
+    fi
     virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $NPROC -M 2000 -c1 10 -c2 10
     perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
     perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
@@ -135,7 +125,6 @@ then
     vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz
 elif [ $algo == 'mutect' ]
 then
-    gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
     module load gatk/4.1.4.0 parallel/20150122
     threads=`expr $NPROC / 2`
     gatk --java-options "-Xmx20g" Mutect2 $ponopt  --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
index a14ff60..e030948 100755
--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -11,7 +11,7 @@ usage() {
   exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :r:p:b:t:x:c:y:n:l:a:hf opt
+while getopts :r:p:b:t:x:c:g:y:n:l:a:hf opt
 do
     case $opt in
         r) index_path=$OPTARG;;
@@ -22,6 +22,7 @@ do
         x) tid=$OPTARG;;
         y) nid=$OPTARG;;
 	f) filter=1;;
+	g) snpeffgeno=$OPTARG;;
         b) sbam=$OPTARG;;
 	c) tbed=$OPTARG;;
 	l) itdbed=$OPTARG;;
-- 
GitLab