From 8bfcd5b96f9b178a4a97c7d2388541f7c901c805 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 12 Feb 2020 10:21:48 -0600 Subject: [PATCH] adding option for GRCh37 dnaseqalign and options for SV calling --- alignment/dnaseqalign.sh | 2 +- variants/norm_annot.sh | 10 ++++++++-- variants/parse_pindel.pl | 2 +- variants/svcalling.sh | 5 ++++- variants/uniform_vcf_gt.pl | 6 +++++- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 184c787..fe4606a 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -67,7 +67,7 @@ bwa mem -M -t $NPROC -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\t if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam -elif [[ "${index_path}/genome.fa.alt" ]] +elif [[ -f "${index_path}/genome.fa.alt" ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam elif [[ $umi == 'umi' ]] diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh index 1f05c4e..776c306 100755 --- a/variants/norm_annot.sh +++ b/variants/norm_annot.sh @@ -10,12 +10,13 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:p:v:h opt +while getopts :r:p:v:sh opt do case $opt in p) pair_id=$OPTARG;; v) vcf=$OPTARG;; r) index_path=$OPTARG;; + s) skipnorm=1;; h) usage;; esac done @@ -40,4 +41,9 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf bgzip -f ${pair_id}.uniform.vcf j=${pair_id}.uniform.vcf.gz tabix -f $j -bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz +if [[ skipnorm==1 ]] +then + cp $j ${pair_id}.norm.vcf.gz +else + bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz +fi diff --git a/variants/parse_pindel.pl b/variants/parse_pindel.pl index 79e64f0..3f00316 100755 --- a/variants/parse_pindel.pl +++ b/variants/parse_pindel.pl @@ -79,7 +79,7 @@ while (my $line = <VCF>) { if ($hash{SVTYPE} eq 'DUP:TANDEM') { print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') { - if (abs($hash{SVLEN}) < 50) { + if (abs($hash{SVLEN}) < 20) { print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }else { $newalt = "<".$hash{SVTYPE}.">"; diff --git a/variants/svcalling.sh b/variants/svcalling.sh index d193da7..dcda4b7 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -104,7 +104,10 @@ then else /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $NPROC -G ${reffa} -t ${sbam} -a ${pair_id} fi - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz + vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.unfiltered.vcf + bash $baseDir/norm_annot.sh -r ${index_path} -p svaba -v svaba.unfiltered.vcf -s + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.norm.vcf | bgzip > svaba.vcf.gz + fi if [[ $method == 'lumpy' ]] diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index fd497db..9bb7953 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -42,13 +42,17 @@ while (my $line = <VCF>) { foreach my $i (0..$#deschead) { $gtdata{$deschead[$i]} = $gtinfo[$i]; } - if ($gtdata{AD}){ + if ($gtdata{AD} =~ m/\d+,\d+/){ ($gtdata{RO},@alts) = split(/,/,$gtdata{AD}); $gtdata{AO} = join(",",@alts); $gtdata{DP} = $gtdata{RO}; foreach (@alts) { $gtdata{DP} += $_; } + } elsif ($gtdata{AD} =~ m/^\d+$/ && $gtdata{DP}){ + $gtdata{AO} = $gtdata{AD}; + $gtdata{RO} = $gtdata{DP} - $gtdata{AD}; + $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO}); } elsif (exists $gtdata{NR} && exists $gtdata{NV}) { $gtdata{DP} = $gtdata{NR}; $gtdata{AO} = $gtdata{NV}; -- GitLab