From 7da44e40ef8df9c548da73bc2108b7c30a422d8c Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Thu, 19 Mar 2020 14:27:37 -0500 Subject: [PATCH] filter svaba to have 10kb + in fusion --- variants/filter_svaba.pl | 9 +++++++-- variants/svcalling.sh | 2 +- variants/uniform_vcf_gt.pl | 16 ++++++++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl index 0068be0..116f7da 100755 --- a/variants/filter_svaba.pl +++ b/variants/filter_svaba.pl @@ -114,7 +114,7 @@ W1:while (my $line = <IN>) { } print VCFOUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$newannot, $format,@gts),"\n"; - }elsif ($hash{SVTYPE} eq 'DEL' && $hash{SPAN} && $hash{SPAN} > 9999) { + } elsif ($hash{SVTYPE} eq 'DEL' && $hash{SPAN} && $hash{SPAN} > 9999) { my ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$keeptrx); @@ -230,6 +230,9 @@ close IN; foreach my $id (keys %svpairs) { + if ($id =~ m/815016443/) { + warn "debugging\n"; + } my $alt1 = $svpairs{$id}{1}{alt}; my $alt2 = $svpairs{$id}{2}{alt}; my $svtype; @@ -237,8 +240,10 @@ foreach my $id (keys %svpairs) { $svtype = 'DEL'; }elsif ($alt2 =~ m/^\w\[/ && $alt1 =~ m/^\]/) { $svtype = 'INS'; + }else { + $svtype = 'UNK'; } - if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/)) { + if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) { if ($filter =~ m/LOWMAPQ|LowQual/i) { $filter = 'FailedQC'.$filter; } diff --git a/variants/svcalling.sh b/variants/svcalling.sh index e3f682c..41671c5 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -123,7 +123,7 @@ then if [[ $filter == 1 ]] then zgrep '#CHROM' ${pair_id}.svaba.sv.vcf.gz > ${pair_id}.svaba.genefusion.txt - zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.sgf.txt + zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.sgf.txt mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz perl $baseDir/filter_svaba.pl -t $tid -p $pair_id -i ${pair_id}.svaba.ori.vcf.gz -s ${pair_id}.svaba.sv.vcf.gz bgzip ${pair_id}.svaba.vcf diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index bef68c3..661f7f2 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -27,15 +27,19 @@ while (my $line = <VCF>) { my ($key,$val) = split(/=/,$a); $hash{$key} = $val; } - if ($alt =~ m/^chr/) { - $chr2 =~ m/(chr\w+):(\d+)/; - $chr2=$1; + if ($alt =~ m/chr(\w+):(\d+)/i) { + $chr2='chr'.$1; $p2 = $2; $hash{CHR2} = $chr2; $hash{'END'} = $p2; $annot .= ";CHR2=$chr2;END=$p2"; + }elsif ($alt =~ m/CHR(\w+):(\d+)/i) { + $chr2='chr'.$1; + $p2 = $2; + $hash{CHR2} = 'chr'.$1; + $hash{END} = $2; + $annot .= ";CHR2=$chr2;END=$p2"; } - my @deschead = split(/:/,$format); my $newformat = 'GT:DP:AD:AO:RO'; my @newgts = (); @@ -61,6 +65,10 @@ while (my $line = <VCF>) { } elsif ($gtdata{AD} =~ m/^\d+$/){ $gtdata{AO} = $gtdata{AD}; $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; + if ($gtdata{RO} < 0) { + $gtdata{DP} += $gtdata{AO}; + $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; + } $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO}); } elsif (exists $gtdata{DV} && exists $gtdata{RV}) { $gtdata{AO} = $gtdata{DV} + $gtdata{RV}; -- GitLab