From 8bfcd5b96f9b178a4a97c7d2388541f7c901c805 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Wed, 12 Feb 2020 10:21:48 -0600
Subject: [PATCH] adding option for GRCh37 dnaseqalign and options for SV
 calling

---
 alignment/dnaseqalign.sh   |  2 +-
 variants/norm_annot.sh     | 10 ++++++++--
 variants/parse_pindel.pl   |  2 +-
 variants/svcalling.sh      |  5 ++++-
 variants/uniform_vcf_gt.pl |  6 +++++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh
index 184c787..fe4606a 100644
--- a/alignment/dnaseqalign.sh
+++ b/alignment/dnaseqalign.sh
@@ -67,7 +67,7 @@ bwa mem -M -t $NPROC -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\t
 if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]]
 then
     k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam
-elif [[ "${index_path}/genome.fa.alt" ]]
+elif [[ -f "${index_path}/genome.fa.alt" ]]
 then
     k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam
 elif [[ $umi == 'umi' ]]
diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh
index 1f05c4e..776c306 100755
--- a/variants/norm_annot.sh
+++ b/variants/norm_annot.sh
@@ -10,12 +10,13 @@ usage() {
   exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :r:p:v:h opt
+while getopts :r:p:v:sh opt
 do
     case $opt in
         p) pair_id=$OPTARG;;
 	v) vcf=$OPTARG;;
 	r) index_path=$OPTARG;;
+	s) skipnorm=1;;
         h) usage;;
     esac
 done
@@ -40,4 +41,9 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
 bgzip -f ${pair_id}.uniform.vcf
 j=${pair_id}.uniform.vcf.gz
 tabix -f $j
-bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz
+if [[ skipnorm==1 ]]
+then
+    cp $j ${pair_id}.norm.vcf.gz
+else
+    bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz
+fi
diff --git a/variants/parse_pindel.pl b/variants/parse_pindel.pl
index 79e64f0..3f00316 100755
--- a/variants/parse_pindel.pl
+++ b/variants/parse_pindel.pl
@@ -79,7 +79,7 @@ while (my $line = <VCF>) {
   if ($hash{SVTYPE} eq 'DUP:TANDEM') {
     print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
   }elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') {
-    if (abs($hash{SVLEN}) < 50) {
+    if (abs($hash{SVLEN}) < 20) {
       print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
     }else {
       $newalt = "<".$hash{SVTYPE}.">";
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
index d193da7..dcda4b7 100755
--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -104,7 +104,10 @@ then
     else
 	/project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $NPROC -G ${reffa} -t ${sbam} -a ${pair_id}
     fi
-    java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz
+    vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.unfiltered.vcf
+    bash $baseDir/norm_annot.sh -r ${index_path} -p svaba -v svaba.unfiltered.vcf -s
+    java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.norm.vcf | bgzip > svaba.vcf.gz
+    
 fi
 
 if [[ $method == 'lumpy' ]]
diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl
index fd497db..9bb7953 100755
--- a/variants/uniform_vcf_gt.pl
+++ b/variants/uniform_vcf_gt.pl
@@ -42,13 +42,17 @@ while (my $line = <VCF>) {
       foreach my $i (0..$#deschead) {
 	  $gtdata{$deschead[$i]} = $gtinfo[$i];
       }
-      if ($gtdata{AD}){
+      if ($gtdata{AD} =~ m/\d+,\d+/){
 	  ($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
 	  $gtdata{AO} = join(",",@alts);
 	  $gtdata{DP} = $gtdata{RO};
 	  foreach (@alts) {
 	      $gtdata{DP} += $_;
 	  }
+      } elsif ($gtdata{AD} =~ m/^\d+$/ && $gtdata{DP}){
+	  $gtdata{AO} = $gtdata{AD};
+	  $gtdata{RO} = $gtdata{DP} - $gtdata{AD};
+	  $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
       } elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
 	      $gtdata{DP} = $gtdata{NR}; 	
 	      $gtdata{AO} = $gtdata{NV};
-- 
GitLab