From 488f3a25c5501ad2592f7b44efe84d4373e0fc17 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Thu, 19 Mar 2020 11:27:13 -0500
Subject: [PATCH] sv filtering

---
 variants/filter_delly.pl |  3 ++-
 variants/filter_svaba.pl |  3 ++-
 variants/svcalling.sh    | 21 ++++++++++++---------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/variants/filter_delly.pl b/variants/filter_delly.pl
index 4d6cffc..95ea52e 100755
--- a/variants/filter_delly.pl
+++ b/variants/filter_delly.pl
@@ -15,7 +15,7 @@ W1:while (my $line = <IN>) {
   chomp($line);
   if ($line =~ m/^#/) {
     if ($line =~ m/^#CHROM/) {
-      print OUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n";
+      print VCFOUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n";
       my @header = split(/\t/,$line);
       ($chrom, $pos,$id,$ref,$alt,$score,
        $filter,$info,$format,@gtheader) = split(/\t/, $line);
@@ -29,6 +29,7 @@ W1:while (my $line = <IN>) {
       }
     }
     print VCFOUT $line,"\n";
+    next;
   }
   my ($chrom, $pos,$id,$ref,$alt,$score,
       $filter,$annot,$format,@gts) = split(/\t/, $line);
diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl
index f34ce1c..474046b 100755
--- a/variants/filter_svaba.pl
+++ b/variants/filter_svaba.pl
@@ -15,7 +15,7 @@ W1:while (my $line = <IN>) {
   chomp($line);
   if ($line =~ m/^#/) {
     if ($line =~ m/^#CHROM/) {
-      print OUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n";
+      print VCFOUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n";
       my @header = split(/\t/,$line);
       ($chrom, $pos,$id,$ref,$alt,$score,
        $filter,$info,$format,@gtheader) = split(/\t/, $line);
@@ -29,6 +29,7 @@ W1:while (my $line = <IN>) {
       }
     }
     print VCFOUT $line,"\n";
+    next;
   }
   my ($chrom, $pos,$id,$ref,$alt,$score,
       $filter,$annot,$format,@gts) = split(/\t/, $line);
diff --git a/variants/svcalling.sh b/variants/svcalling.sh
index 7ec4c1c..043a86c 100755
--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -77,7 +77,6 @@ then
 	delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
 	#delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf
     else
-	$tid=
 	#RUN DELLY
 	delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam}
 	delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam}
@@ -87,19 +86,23 @@ then
 	#delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf
     fi
     #MERGE DELLY AND MAKE BED
-
+    
     bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz
     bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s
     java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz
     if [[ $filter == 1 ]]
     then
+	if [[ -z $tid ]]
+	then
+	    tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
+	fi
 	zgrep '#CHROM' ${pair_id}.delly.vcf.gz > ${pair_id}.delly.genefusion.txt
-	zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.delly.genefusion.txt
+	zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.dgf.txt
 	mv ${pair_id}.delly.vcf.gz ${pair_id}.delly.ori.vcf.gz
-	bash $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz
+	perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz
 	bgzip ${pair_id}.delly.vcf
-	cat ${pair_id}.potentialfusion.txt >> ${pair_id}.delly.genefusion.txt
-       fi
+	cat ${pair_id}.delly.potentialfusion.txt  ${pair_id}.dgf.txt |sort -u > ${pair_id}.delly.genefusion.txt
+    fi
 elif [[ $method == 'svaba' ]]
 then
     if [[ -n ${normal} ]]
@@ -120,11 +123,11 @@ then
     if [[ $filter == 1 ]]
     then
 	zgrep '#CHROM' ${pair_id}.svaba.sv.vcf.gz > ${pair_id}.svaba.genefusion.txt
-	zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u  >> ${pair_id}.svaba.genefusion.txt
+	zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u  >> ${pair_id}.sgf.txt
 	mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz
-	bash $baseDir/filter_svaba.pl -t $tid -p $pair_id -i ${pair_id}.svaba.ori.vcf.gz
+	perl $baseDir/filter_svaba.pl -t $tid -p $pair_id -i ${pair_id}.svaba.ori.vcf.gz -v
 	bgzip ${pair_id}.svaba.vcf
-	cat ${pair_id}.potentialfusion.txt >> ${pair_id}.svaba.genefusion.txt
+	cat ${pair_id}.svaba.potentialfusion.txt ${pair_id}.sgf.txt | sort -u >> ${pair_id}.svaba.genefusion.txt
        fi
 elif [[ $method == 'lumpy' ]]
 then
-- 
GitLab