From 9b7a55bddca028bf187ab8cfa7e01d4ef7237db3 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 7 Feb 2020 11:45:44 -0600
Subject: [PATCH] add options for alt pon and filtering of itdseek

---
 variants/cnvkit.sh  | 37 +++++++++++++++++++++++++++++--------
 variants/itdseek.sh | 11 ++++++++++-
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh
index 7836509..23a40e5 100755
--- a/variants/cnvkit.sh
+++ b/variants/cnvkit.sh
@@ -15,12 +15,10 @@ while getopts :b:p:n:t:r:uqh opt
 do
     case $opt in
         b) sbam=$OPTARG;;
+	d) paneldir=$OPTARG;;
         p) pair_id=$OPTARG;;
-	n) normals=$OPTARG;;
 	r) index_path=$OPTARG;;
-	t) targets=$OPTARG;;
 	u) umi='umi';;
-	q) idtsnp=1;;
         h) usage;;
     esac
 done
@@ -41,7 +39,7 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
 then
     SLURM_CPUS_ON_NODE=1
 fi
-if [[ -z $normals ]] || [[ -z $targets ]]
+if [[ -z $paneldir ]]
 then
     usage
 fi
@@ -53,14 +51,35 @@ else
     echo "Missing Fasta File: ${index_path}/genome.fa"
     usage
 fi
+if [[ -z $paneldir ]] 
+then
+    paneldir="UTSW_V3_pancancer"
+fi
 
-echo "${targets}targets.bed"
-echo "${targets}antitargets.bed"
-echo "${normals}"
+capture="$paneldir/targetpanel.bed"
+targets="$paneldir/cnvkit."
+normals="$paneldir/pon.cnn"
 
 source /etc/profile.d/modules.sh
 module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 java/oracle/jdk1.8.0_171 snpeff/4.3q
 
+if [[ -f "${paneldir}/pon.downsample.cnn" ]]
+then
+    bedtools coverage -sorted -g  ${index_path}/genomefile.txt -a ${capture} -b ${sbam} -hist > covhist.txt
+    grep ^all covhist.txt > genomecov.txt
+    sumdepth=`awk '{ sum+= $2*$3;} END {print sum;}' genomecov.txt`
+    total=`head -n 1 genomecov.txt |cut -f 4`
+    avgdepth=$((${sumdepth}/${total}))
+    if [[ "$avgdepth" -lt 1000 ]]
+    then
+	normals="${paneldir}/pon.downsample.cnn"
+    fi
+fi
+
+echo "${targets}targets.bed"
+echo "${targets}antitargets.bed"
+echo "${normals}"
+
 unset DISPLAY
 
 cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
@@ -68,7 +87,9 @@ cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcov
 cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr
 cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
 
-if [[ $idtsnp == 1 ]]
+numsnps=`grep -c -P "rs[0-9]+" ${targets}targets.bed`
+
+if [[ $numsnps > 100 ]]
 then
     samtools index ${sbam}
     java -jar /cm/shared/apps/gatk/3.8/target/package/GenomeAnalysisTK.jar -T UnifiedGenotyper -R ${reffa} --output_mode EMIT_ALL_SITES -L ${index_path}/IDT_snps.hg38.bed -o common_variants.vcf -glm BOTH -dcov 10000 -I ${sbam}
diff --git a/variants/itdseek.sh b/variants/itdseek.sh
index f40a330..0ea6ac8 100755
--- a/variants/itdseek.sh
+++ b/variants/itdseek.sh
@@ -9,7 +9,7 @@ usage() {
   exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :r:b:l:p:h opt
+while getopts :r:b:l:p:fh opt
 do
     case $opt in
         r) index_path=$OPTARG;;
@@ -17,6 +17,7 @@ do
         p) pair_id=$OPTARG;;
 	l) itdbed=$OPTARG;;
 	g) snpeffgeno=$OPTARG;;
+	f) filter=1;;
         h) usage;;
     esac
 done
@@ -58,3 +59,11 @@ stexe=`which samtools`
 samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz
 tabix ${pair_id}.itdseek.vcf.gz
 bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config $snpeffgeno - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz
+
+if [[ $filter == 1 ]]
+then
+    perl $baseDir/filter_itdseeker.pl -t ${pair_id} -d ${pair_id}.itdseek_tandemdup.vcf.gz
+    mv ${pair_id}.itdseek_tandemdup.vcf.gz ${pair_id}.itdseek_tandemdup.unfilt.vcf.gz
+    mv ${pair_id}.itdseek_tandemdup.pass.vcf ${pair_id}.itdseek_tandemdup.vcf
+    bgzip ${pair_id}.itdseek_tandemdup.pass.vcf
+fi
-- 
GitLab