From dd30007d4cb2237624cc72a35ca28d6ea8f14baf Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Fri, 25 Sep 2020 18:41:20 -0500 Subject: [PATCH] update svcalling --- variants/filter_svaba.pl | 28 ++++++++++++---------------- variants/svcalling.sh | 4 ++-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl index e962570..72a2eb3 100755 --- a/variants/filter_svaba.pl +++ b/variants/filter_svaba.pl @@ -41,11 +41,11 @@ W1:while (my $line = <IN>) { $hash{$key} = $val unless ($hash{$key}); } if (length($alt) > length($ref)) { - $hash{SVTYPE} = "INS"; - $hash{END} = $pos; + $hash{SVTYPE} = "INS"; + $hash{END} = $pos; }elsif (length($alt) < length($ref)) { - my $diff = substr($ref, length($alt)); - $hash{SVTYPE} = "DEL"; + my $diff = substr($ref, length($alt)); + $hash{SVTYPE} = "DEL"; $hash{END} = $pos + length($diff); } next unless ($hash{ANN}); @@ -92,7 +92,7 @@ W1:while (my $line = <IN>) { my ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); - next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i); + next unless ($impact =~ m/HIGH|MODERATE|LOW/ || $effect =~ /splice/i); next if($effect eq 'sequence_feature'); $keeptrx = $trx; $keepforvcf = $gene; @@ -107,7 +107,6 @@ W1:while (my $line = <IN>) { push @nannot, $info; } } - my $newannot = join(";",@nannot); if ($hash{SVTYPE} eq 'INS' || ($hash{SVTYPE} eq 'DEL' && $keepforvcf !~ m/&/)) { if ($filter =~ m/LOWMAPQ|LowQual/i) { @@ -231,9 +230,6 @@ close IN; foreach my $id (keys %svpairs) { - if ($id =~ m/815016443/) { - warn "debugging\n"; - } my $alt1 = $svpairs{$id}{1}{alt}; my $alt2 = $svpairs{$id}{2}{alt}; my $svtype; @@ -242,14 +238,14 @@ foreach my $id (keys %svpairs) { }elsif ($alt2 =~ m/^\w\[/ && $alt1 =~ m/^\]/) { $svtype = 'INS'; }else { - $svtype = 'UNK'; + $svtype = 'UNK'; } - if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) { - if ($filter =~ m/LOWMAPQ|LowQual/i) { + if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) { + if ($filter =~ m/LOWMAPQ|LowQual/i) { $filter = 'FailedQC'.$filter; - } + } print VCFOUT $svpairs{$id}{1}{vcfline},"\n" - }elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) { - print DELFUS $svpairs{$id}{1}{fusionline},"\n"; - } + }elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) { + print DELFUS $svpairs{$id}{1}{fusionline},"\n"; + } } diff --git a/variants/svcalling.sh b/variants/svcalling.sh index c751db3..562d2ee 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -95,7 +95,7 @@ then java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz if [[ $filter == 1 ]] then - zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.dgf.txt + zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.dgf.txt mv ${pair_id}.delly.vcf.gz ${pair_id}.delly.ori.vcf.gz echo "perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz" perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz @@ -126,7 +126,7 @@ then if [[ $filter == 1 ]] then - zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.sgf.txt + zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.sgf.txt mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz perl $baseDir/filter_svaba.pl -t $tid -p ${pair_id} -i ${pair_id}.svaba.ori.vcf.gz -s ${pair_id}.svaba.sv.vcf.gz bgzip ${pair_id}.svaba.vcf -- GitLab