From 488f3a25c5501ad2592f7b44efe84d4373e0fc17 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Thu, 19 Mar 2020 11:27:13 -0500 Subject: [PATCH] sv filtering --- variants/filter_delly.pl | 3 ++- variants/filter_svaba.pl | 3 ++- variants/svcalling.sh | 21 ++++++++++++--------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/variants/filter_delly.pl b/variants/filter_delly.pl index 4d6cffc..95ea52e 100755 --- a/variants/filter_delly.pl +++ b/variants/filter_delly.pl @@ -15,7 +15,7 @@ W1:while (my $line = <IN>) { chomp($line); if ($line =~ m/^#/) { if ($line =~ m/^#CHROM/) { - print OUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n"; + print VCFOUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n"; my @header = split(/\t/,$line); ($chrom, $pos,$id,$ref,$alt,$score, $filter,$info,$format,@gtheader) = split(/\t/, $line); @@ -29,6 +29,7 @@ W1:while (my $line = <IN>) { } } print VCFOUT $line,"\n"; + next; } my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl index f34ce1c..474046b 100755 --- a/variants/filter_svaba.pl +++ b/variants/filter_svaba.pl @@ -15,7 +15,7 @@ W1:while (my $line = <IN>) { chomp($line); if ($line =~ m/^#/) { if ($line =~ m/^#CHROM/) { - print OUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n"; + print VCFOUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n"; my @header = split(/\t/,$line); ($chrom, $pos,$id,$ref,$alt,$score, $filter,$info,$format,@gtheader) = split(/\t/, $line); @@ -29,6 +29,7 @@ W1:while (my $line = <IN>) { } } print VCFOUT $line,"\n"; + next; } my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 7ec4c1c..043a86c 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -77,7 +77,6 @@ then delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} #delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf else - $tid= #RUN DELLY delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} @@ -87,19 +86,23 @@ then #delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf fi #MERGE DELLY AND MAKE BED - + bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz if [[ $filter == 1 ]] then + if [[ -z $tid ]] + then + tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` + fi zgrep '#CHROM' ${pair_id}.delly.vcf.gz > ${pair_id}.delly.genefusion.txt - zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.delly.genefusion.txt + zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.dgf.txt mv ${pair_id}.delly.vcf.gz ${pair_id}.delly.ori.vcf.gz - bash $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz + perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz bgzip ${pair_id}.delly.vcf - cat ${pair_id}.potentialfusion.txt >> ${pair_id}.delly.genefusion.txt - fi + cat ${pair_id}.delly.potentialfusion.txt ${pair_id}.dgf.txt |sort -u > ${pair_id}.delly.genefusion.txt + fi elif [[ $method == 'svaba' ]] then if [[ -n ${normal} ]] @@ -120,11 +123,11 @@ then if [[ $filter == 1 ]] then zgrep '#CHROM' ${pair_id}.svaba.sv.vcf.gz > ${pair_id}.svaba.genefusion.txt - zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.svaba.genefusion.txt + zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u >> ${pair_id}.sgf.txt mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz - bash $baseDir/filter_svaba.pl -t $tid -p $pair_id -i ${pair_id}.svaba.ori.vcf.gz + perl $baseDir/filter_svaba.pl -t $tid -p $pair_id -i ${pair_id}.svaba.ori.vcf.gz -v bgzip ${pair_id}.svaba.vcf - cat ${pair_id}.potentialfusion.txt >> ${pair_id}.svaba.genefusion.txt + cat ${pair_id}.svaba.potentialfusion.txt ${pair_id}.sgf.txt | sort -u >> ${pair_id}.svaba.genefusion.txt fi elif [[ $method == 'lumpy' ]] then -- GitLab