From 11a5dfe582646d273c097bbd16da888b78d8ac65 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 15 Dec 2017 10:21:38 -0600
Subject: [PATCH] final bugs somatic/sv workflows

---
 alignment/markdups.sh  |  5 ++---
 variants/somatic_vc.sh | 12 ++++++------
 variants/svcalling.sh  | 30 ++++++++++++++++--------------
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/alignment/markdups.sh b/alignment/markdups.sh
index ad1b8c7..a6ca732 100644
--- a/alignment/markdups.sh
+++ b/alignment/markdups.sh
@@ -54,11 +54,10 @@ then
     java -Djava.io.tmpdir=./ -Xmx4g  -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
 elif [ $algo == 'fgbio_umi' ]   
 then
-    module load fgbio
+    module load fgbio bwa/intel/0.7.15
     samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
-    fgbio GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam -m 0
+    fgbio GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam -e 0 -m 0
     fgbio CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:'
-    module load bwa/intel/0.7.15
     samtools index ${pair_id}.consensus.bam
     samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam
     gzip ${pair_id}.consensus.R1.fastq
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index 23b53ac..be92e24 100644
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -86,7 +86,7 @@ if [ $algo == 'strelka2' ]
     manta/runWorkflow.py -m local -j 8
     configureStrelkaSomaticWorkflow.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
     strelka/runWorkflow.py -m local -j 8
-    vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') & (GEN[*].DP >= 10))" | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${pair_id}.strelka.vcf.gz
+    vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') & (GEN[*].DP >= 10))" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka.vcf.gz
 fi
 
 if [ $algo == 'virmid' ]
@@ -95,21 +95,21 @@ if [ $algo == 'virmid' ]
     virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10
     perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
     perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
-    vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' | bgzip > ${pair_id}.virmid.vcf.gz
+    vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz
 fi
 
 if [ $algo == 'speedseq' ]
   then 
     module load snpeff/4.3q speedseq/20160506 samtools/1.6 vcftools/0.1.14
     speedseq somatic -q 10 -t $SLURM_CPUS_ON_NODE -o sssom ${reffa} ${normal} ${tumor}
-    vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${pair_id}.sssom.vcf.gz
+    vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.sssom.vcf.gz
 fi
 
 if [ $algo == 'mutect2' ]
 then
   module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14
   cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
-  vcf-concat ${tid}*.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${pair_id}.pmutect.vcf.gz
+  vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.pmutect.vcf.gz
 fi
 
 if [ $algo == 'varscan' ]
@@ -119,7 +119,7 @@ then
   sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${normal} --samtools "-C 50 -f ${reffa}"  > n.mpileup
   VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
   VarScan copynumber n.mpileup t.mpileup vscancnv 
-  vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' | bgzip > ${tid}_${nid}.varscan.vcf.gz
+  vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${tid}_${nid}.varscan.vcf.gz
 fi
 
 if [ $algo == 'shimmer' ]
@@ -127,7 +127,7 @@ then
     module load snpeff/4.3q shimmer/0.1.1 samtools/1.6  vcftools/0.1.14
     shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
     perl /project/PHG/PHG_Clinical/clinseq_workflows/scripts/add_readct_shimmer.pl
-    vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' | bgzip > ${pair_id}.shimmer.vcf.gz
+    vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz
 fi
 
 if [ $algo == 'lancet' ]
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
index ea22332..8e715ca 100644
--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -50,13 +50,13 @@ fi
 module load  speedseq/20160506 novoBreak/v1.1.3 delly2/v0.7.7-multi samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
 mkdir temp
 
-if [[ -n ${normal} ]]
-then
-  run_novoBreak.sh /cm/shared/apps/novoBreak/novoBreak_distribution_v1.1.3rc ${reffa} ${sbam} ${normal} $SLURM_CPUS_ON_NODE
-  perl $baseDir/vcf2bed.sv.pl novoBreak.pass.flt.vcf |sort -T temp -V -k 1,1 -k 2,2n > novobreak.bed
-  mv novoBreak.pass.flt.vcf ${pair_id}.novobreak.vcf
-  bgzip ${pair_id}.novobreak.vcf
-fi
+#if [[ -n ${normal} ]]
+#then
+  #run_novoBreak.sh /cm/shared/apps/novoBreak/novoBreak_distribution_v1.1.3rc ${reffa} ${sbam} ${normal} $SLURM_CPUS_ON_NODE
+  #perl $baseDir/vcf2bed.sv.pl novoBreak.pass.flt.vcf |sort -T temp -V -k 1,1 -k 2,2n > novobreak.bed
+  #mv novoBreak.pass.flt.vcf ${pair_id}.novobreak.vcf
+  #bgzip ${pair_id}.novobreak.vcf
+#fi
 if [[ -n ${normal} ]]
 then
   #RUN DELLY
@@ -121,13 +121,15 @@ java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 10" ${pair_id}.sssv.sv.vc
 perl $baseDir/vcf2bed.sv.pl lumpy.vcf > lumpy.bed
 
 #COMPARE DELLY & LUMPY
-if [[ -n ${normal} ]]
-then
-  bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed 
-  grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz  | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
-else
-  bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed 
-fi
+#if [[ -n ${normal} ]]
+#then
+  #bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed
+  #zcat ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
+  #grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz  | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
+#else
+#fi
+
+bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed 
 grep delly sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.delly.vcf.gz |bgzip > svt2.vcf.gz
 grep lumpy sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'delly' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.sssv.sv.vcf.gz |bgzip > svt3.vcf.gz
 
-- 
GitLab