From e56380dff08486fa6ba2c1293ce49f58b2a3ca1b Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Sat, 7 Mar 2020 19:52:57 -0600 Subject: [PATCH] update svcalling for filtering for gene fusion --- variants/filter_pindel.pl | 7 ++---- variants/parse_pindel.pl | 4 +-- variants/svcalling.sh | 51 ++++++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/variants/filter_pindel.pl b/variants/filter_pindel.pl index 63f33e3..1802a79 100755 --- a/variants/filter_pindel.pl +++ b/variants/filter_pindel.pl @@ -88,14 +88,11 @@ foreach $file (@files) { $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i); next if($effect eq 'sequence_feature'); - if ($file eq $opt{sv}) { - next unless ($effect eq 'gene_fusion'); - } $keepforvcf = $gene; } next unless $keepforvcf; - if ($tumormaf[0] < 0.1) { - next unless ($outfile =~ m/pindel_tandemdup/); + if ($tumormaf[0] < 0.05) { + next unless ($outfile =~ m/tandemdup/); } my @fail = sort {$a cmp $b} keys %fail; next if (scalar(@fail) > 0); diff --git a/variants/parse_pindel.pl b/variants/parse_pindel.pl index ecc47d2..714fe94 100755 --- a/variants/parse_pindel.pl +++ b/variants/parse_pindel.pl @@ -64,7 +64,7 @@ while (my $line = <VCF>) { $gtdata{DP} += $_; } } - if (($gtdata{DP} && $gtdata{DP} < 10) || ()) { + if (($gtdata{DP} && $gtdata{DP} < 20) || ()) { $missingGT ++; } if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') { push @newgts, '.:.:.:.:.'; @@ -79,7 +79,7 @@ while (my $line = <VCF>) { if ($hash{SVTYPE} eq 'DUP:TANDEM') { print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') { - if (abs($hash{SVLEN}) < 100) { + if (abs($hash{SVLEN}) < 30) { print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }else { $newalt = "<".$hash{SVTYPE}.">"; diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 5da21c2..77c2d26 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -76,10 +76,6 @@ then delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} #delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf - #delly2 filter -o ${pair_id}.delly_dup.bcf -f somatic -s samples.tsv ${pair_id}.delly_duplications.bcf - #delly2 filter -o ${pair_id}.delly_inv.bcf -f somatic -s samples.tsv ${pair_id}.delly_inversions.bcf - #delly2 filter -o ${pair_id}.delly_del.bcf -f somatic -s samples.tsv ${pair_id}.delly_deletion.bcf - #delly2 filter -o ${pair_id}.delly_ins.bcf -f somatic -s samples.tsv ${pair_id}.delly_insertion.bcf else #RUN DELLY delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} @@ -88,20 +84,17 @@ then delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} #delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf - #delly2 filter -o ${pair_id}.delly_dup.bcf -f germline ${pair_id}.delly_duplications.bcf - #delly2 filter -o ${pair_id}.delly_inv.bcf -f germline ${pair_id}.delly_inversions.bcf - #delly2 filter -o ${pair_id}.delly_del.bcf -f germline ${pair_id}.delly_deletion.bcf - #delly2 filter -o ${pair_id}.delly_ins.bcf -f germline ${pair_id}.delly_insertion.bcf fi #MERGE DELLY AND MAKE BED - bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz + bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.delly.sv.norm.vcf.gz | bgzip > ${pair_id}.delly.sv.vcf.gz - bcftools concat -a -O v ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.dellyindel.vcf.gz - bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.indel -v ${pair_id}.dellyindel.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.delly.indel.norm.vcf.gz | bgzip > ${pair_id}.delly.vcf.gz - + java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz + if [[ $filter == 1 ]] + then + zgrep '#CHROM' ${pair_id}.delly.vcf.gz > ${pair_id}.delly.genefusion.txt + zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE ANN[*].FEATUREID FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.delly.genefusion.txt + fi elif [[ $method == 'svaba' ]] then if [[ -n ${normal} ]] @@ -111,17 +104,17 @@ then svaba run -p $NPROC -G ${reffa} -t ${sbam} -a ${pair_id} fi #Create SV FILE - vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.sv.vcf + vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf ${pair_id}.svaba.unfiltered*indel.vcf | vcf-sort -t temp > svaba.sv.vcf bgzip -f svaba.sv.vcf tabix -f svaba.sv.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.sv -v svaba.sv.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.sv.norm.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.svaba.sv.vcf.gz - #Create INDEL FILE - vcf-concat ${pair_id}.svaba.unfiltered*indel.vcf | vcf-sort -t temp > svaba.indel.vcf - bgzip -f svaba.indel.vcf - tabix -f svaba.indel.vcf.gz - bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.indel -v svaba.indel.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.indel.norm.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.svaba.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.sv.norm.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20)" | bgzip > ${pair_id}.svaba.vcf.gz + + if [[ $filter == 1 ]] + then + zgrep '#CHROM' ${pair_id}.svaba.vcf.gz > ${pair_id}.svaba.genefusion.txt + zcat ${pair_id}.svaba.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE ANN[*].FEATUREID FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.svaba.genefusion.txt + fi elif [[ $method == 'lumpy' ]] then #MAKE FILES FOR LUMPY @@ -160,6 +153,20 @@ then java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz + if [[ $filter == 1 ]] + then + perl $baseDir/process_scripts/variants/filter_pindel.pl -d ${pair_id}.pindel_tandemdup.vcf.gz -s ${pair_id}.pindel_sv.vcf.gz -i ${pair_id}.pindel_indel.vcf.gz + mv ${pair_id}x.pindel_tandemdup.vcf.gz ${pair_id}.pindel_tandemdup.unfilt.vcf.gz + mv ${pair_id}.pindel_tandemdup.pass.vcf ${pair_id}.pindel_tandemdup.vcf + bgzip ${pair_id}.pindel_tandemdup.vcf + mv ${pair_id}.pindel_indel.pass.vcf ${pair_id}.pindel.vcf + bgzip ${pair_id}.pindel.vcf + mv ${pair_id}.pindel.sv.vcf.gz ${pair_id}.pindel.sv.unfilt.vcf.gz + mv ${pair_id}.pindel.sv.pass.vcf ${pair_id}.pindel.sv.vcf + bgzip ${pair_id}.pindel.sv.vcf + zgrep '#CHROM' ${pair_id}.pindel.sv.vcf.gz > ${pair_id}.pindel.genefusion.txt + zcat ${pair_id}.pindel.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHROM END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE ANN[*].FEATUREID FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.pindel.genefusion.txt + fi elif [[ $method == 'itdseek' ]] then stexe=`which samtools` -- GitLab