From aed499148f69fb5c56706df79d4da9637a92bd8e Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Mon, 2 Dec 2019 13:11:54 -0600
Subject: [PATCH] update cnv, mutect2

---
 variants/cnvkit.sh      | 20 +++++++++++++++-----
 variants/germline_vc.sh | 31 ++++++++++++++++++++++---------
 variants/somatic_vc.sh  | 19 +++++++++++--------
 3 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh
index 1031ec3..b4a3eff 100755
--- a/variants/cnvkit.sh
+++ b/variants/cnvkit.sh
@@ -11,7 +11,7 @@ usage() {
   exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :b:p:n:f:t:c:uh opt
+while getopts :b:p:n:f:t:c:uqh opt
 do
     case $opt in
         b) sbam=$OPTARG;;
@@ -21,6 +21,7 @@ do
 	t) targets=$OPTARG;;
 	c) capture=$OPTARG;;
 	u) umi='umi';;
+	q) idtsnp=1;;
         h) usage;;
     esac
 done
@@ -54,15 +55,24 @@ source /etc/profile.d/modules.sh
 module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8
 unset DISPLAY
 
-#samtools index ${sbam}
-#bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/NGSCheckMate.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf
+if [[ $idtsnp == 1 ]]
+then
+    samtools index ${sbam}
+    bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/clinseq_prj/IDT_snps.120bp.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf
+fi
 
 cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
 cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
 cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr
 cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
-cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
-#cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.ballelecall.cns
+
+if [[ $idtsnp == 1 ]]
+then
+    cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.call.cns
+else 
+    cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
+fi
+
 cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
 cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 >  ${pair_id}.cytoband.bed
 perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns
diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
index 2ddc6f7..62006d2 100755
--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -11,13 +11,14 @@ usage() {
   exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :r:a:b:p:th opt
+while getopts :r:a:b:q:p:th opt
 do
     case $opt in
         r) index_path=$OPTARG;;
         p) pair_id=$OPTARG;;
         a) algo=$OPTARG;;
 	t) rna=1;;
+	q) pon==$OPTARG;; 
         h) usage;;
     esac
 done
@@ -82,11 +83,19 @@ then
     done
     cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa  --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf"
     vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz -
+elif [[ $algo == 'platypus' ]]
+then
+    module load platypus/gcc/0.8.1
+    bamlist=`join_by , *.bam`
+    Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
+    vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
+    tabix platypus.vcf.gz
+    bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
 elif [[ $algo == 'gatk' ]]
 then
     gatk4_dbsnp=/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/dbSnp.gatk4.vcf.gz
     user=$USER
-    module load gatk/4.1.2.0
+    module load gatk/4.1.4.0
     gvcflist=''
     for i in *.bam; do
 	prefix="${i%.bam}"
@@ -101,14 +110,18 @@ then
     gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf
     bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz
     tabix ${pair_id}.gatk.vcf.gz
-elif [[ $algo == 'platypus' ]]
+elif [ $algo == 'mutect2' ]
 then
-    module load platypus/gcc/0.8.1
-    bamlist=`join_by , *.bam`
-    Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
-    vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
-    tabix platypus.vcf.gz
-    bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
+  gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
+  module load gatk/4.1.4.0
+    for i in *.bam; do
+	prefix="${i%.bam}"
+	echo ${prefix}
+	java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g  -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${i} O=artifact_metrics.txt R=${reffa}
+	gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${i} --output ${prefix}.mutect.vcf
+	gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf
+	vcf-sort ${prefix}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${prefix}.mutect.vcf.gz
+    done
 elif [[ $algo == 'strelka2' ]]
 then
     if [[ $rna == 1 ]]
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index 8613f4c..e6d6c90 100755
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -20,7 +20,7 @@ usage(){
 }
 
 OPTIND=1 # Reset OPTIND
-while getopts :n:t:p:r:x:y:i:j:a:b:h opt
+while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt
 do
     case $opt in 
       r) index_path=$OPTARG;;
@@ -33,6 +33,7 @@ do
       j) mtumor=$OPTARG;;
       a) algo=$OPTARG;;
       b) tbed=$OPTARG;; 
+      q) pon==$OPTARG;; 
       h) usage;;
     esac
 done
@@ -54,6 +55,12 @@ then
     mtumor=tumor
     mnormal=normal
 fi
+if [[ -z $pon ]]
+then
+    ponopt='';
+else
+    ponopt="--pon $pon"
+fi
 
 if [[ -a "${index_path}/genome.fa" ]]
 then
@@ -105,14 +112,10 @@ if [ $algo == 'virmid' ]
 elif [ $algo == 'mutect2' ]
 then
   gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
-  user=$USER
-  module load gatk/4.x singularity/2.6.1 picard/2.10.3
-  mkdir /tmp/${user}
-  export TMP_HOME=/tmp/${user}
+  module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
   java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g  -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
-  singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" Mutect2 -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
-  singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
-  module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
+  gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
+  gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
   vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
 elif [ $algo == 'varscan' ]
 then
-- 
GitLab