From 91f715b4ae2ecb62afacec91c5ed00c545c357d7 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Wed, 6 Dec 2017 16:58:29 -0600
Subject: [PATCH] update svcalling for somatic

---
 alignment/markdups.sh  |  3 +--
 variants/somatic_vc.sh |  7 +++++--
 variants/svcalling.sh  | 12 +++++++-----
 variants/unionvcf.pl   |  2 +-
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/alignment/markdups.sh b/alignment/markdups.sh
index f040fc0..ad1b8c7 100644
--- a/alignment/markdups.sh
+++ b/alignment/markdups.sh
@@ -54,11 +54,10 @@ then
     java -Djava.io.tmpdir=./ -Xmx4g  -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
 elif [ $algo == 'fgbio_umi' ]   
 then
+    module load fgbio
     samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
-    source activate fgbiotools
     fgbio GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam -m 0
     fgbio CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:'
-    source deactivate
     module load bwa/intel/0.7.15
     samtools index ${pair_id}.consensus.bam
     samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index 2c1dccb..37bc444 100644
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -12,12 +12,14 @@ usage(){
   echo "-y --TumorID"
   echo "-i --NormalBAM used for Mantra in the case of UMI consensus"
   echo "-j --TumorBAM used for Mantra in the case of UMI consensus"
+  echo "-b --TargetBed"
+  
   echo "Example: bash somatic_vc.sh -a strelka2 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam"
   exit 1
 }
 
 OPTIND=1 # Reset OPTIND
-while getopts :n:t:r:x:y:i:j:a:h opt
+while getopts :n:t:r:x:y:i:j:a:b:h opt
 do
     case $opt in 
       r) index_path=$OPTARG;;
@@ -28,6 +30,7 @@ do
       i) mnormal=$OPTARG;;
       j) mtumor=$OPTARG;;
       a) algo=$OPTARG;;
+      b) tbed=$OPTARG;; 
       h) usage;;
     esac
 done
@@ -130,6 +133,6 @@ fi
 if [ $algo == 'lancet' ]
 then
     module load snpeff/4.3q lancet vcftools/0.1.14
-    lancet --tumor ${tumor} --normal ${normal} --ref $reffa --bed $target_panel --num-threads 16 > lancet.vcf
+    lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads 16 > lancet.vcf
     vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${pair_id}.lancet.vcf.gz
 fi
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
index 504ff88..ea22332 100644
--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -86,12 +86,12 @@ fi
 #MERGE DELLY AND MAKE BED
 
 bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf
-perl $baseDir/vcf2bed.sv.pl delly.vcf > delly.bed
+perl $baseDir/vcf2bed.sv.pl delly.vcf | sort -V -k 1,1 -k 2,2n > delly.bed
 bgzip delly.vcf
 if [[ -n ${normal} ]]
 then
     tabix delly.vcf.gz
-    bcftools view -O z -o delly.vcf.gz -s ${keepid} ${pair_id}.delly.vcf.gz
+    bcftools view -O z -o ${pair_id}.delly.vcf.gz -s ${keepid} delly.vcf.gz
 else
     mv delly.vcf.gz ${pair_id}.delly.vcf.gz
 fi
@@ -110,19 +110,21 @@ then
     gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools  view -S -b - | samtools sort -o normal.splitters.bam -
     gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools  view -S  -b - | samtools sort -o normal.discordants.bam -
     speedseq sv -t $SLURM_CPUS_ON_NODE -o sssv -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed
-    bcftools view -O z -o sssv.vcf.gz -s ${keepid} ${pair_id}.sssv.sv.vcf.gz 
+    java -jar $SNPEFF_HOME/SnpSift.jar filter -n "GEN[0].SU > 1" sssv.sv.vcf.gz |bgzip > tumor.sssv.sv.vcf.gz
+    tabix tumor.sssv.sv.vcf.gz
+    bcftools view -O z -o ${pair_id}.sssv.sv.vcf.gz -s ${keepid} tumor.sssv.sv.vcf.gz
 else
     speedseq sv -t $SLURM_CPUS_ON_NODE -o ${pair_id}.sssv -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed   
 fi
 
-java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 2" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf
+java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 10" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf
 perl $baseDir/vcf2bed.sv.pl lumpy.vcf > lumpy.bed
 
 #COMPARE DELLY & LUMPY
 if [[ -n ${normal} ]]
 then
   bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed 
-  grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${tid}_${nid}.novobreak.vcf.gz  | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
+  grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz  | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
 else
   bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed 
 fi
diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl
index 699a7bb..f03c96d 100755
--- a/variants/unionvcf.pl
+++ b/variants/unionvcf.pl
@@ -15,7 +15,7 @@ my @sampleorder;
 
 my %headerlines;
 foreach $vcf (@vcffiles) {
-    $caller = (split(/\./,$vcf))[1];
+    $caller = (split(/\./,$vcf))[-3];
     open VCF, "gunzip -c $vcf|" or die $!;
     my @sampleids;
     while (my $line = <VCF>) {
-- 
GitLab