diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 5e02d9ebfdbeeb79081387dab5c82632ab641d44..f8b2eaa1f7412c751c0a289ca4bdca5a71a2ba98 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -82,7 +82,7 @@ then vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE bgzip ${pair_id}.sam.vcf -elif [[ $algo == 'freebayes' ]] +elif [[ $algo == 'fb' ]] then module load freebayes/gcc/1.2.0 parallel/20150122 bamlist='' @@ -90,7 +90,7 @@ then bamlist="$bamlist --bam ${PWD}/${i}" done cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $NPROC "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" - vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - + vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.fb.vcf.gz - elif [[ $algo == 'platypus' ]] then module load platypus/gcc/0.8.1 @@ -117,7 +117,7 @@ then gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz -elif [ $algo == 'mutect2' ] +elif [ $algo == 'mutect' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index b5813c72391bff8277fc7fed88b4d155fbd9bb6b..f441cdf44fbf8d912b8ecef9df99e3f373eaa2b5 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -115,7 +115,7 @@ if [ $algo == 'virmid' ] module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz -elif [ $algo == 'mutect2' ] +elif [ $algo == 'mutect' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 59119ba234bea4693c1247063361ec3837bd23ce..5da21c2084f521cbb7d5680fcf739d4e40b1e922 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -64,39 +64,44 @@ mkdir temp if [[ $method == 'delly' ]] then - module load delly2/v0.7.7-multi + #module load delly2/v0.7.7-multi if [[ -n ${normal} ]] then #RUN DELLY echo -e "${nid}\tcontrol"> samples.tsv echo -e "${tid}\ttumor" >> samples.tsv - delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 filter -t BND -o delly_tra.bcf -f somatic -s samples.tsv delly_translocations.bcf - delly2 filter -t DUP -o delly_dup.bcf -f somatic -s samples.tsv delly_duplications.bcf - delly2 filter -t INV -o delly_inv.bcf -f somatic -s samples.tsv delly_inversions.bcf - delly2 filter -t DEL -o delly_del.bcf -f somatic -s samples.tsv delly_deletion.bcf - delly2 filter -t INS -o delly_ins.bcf -f somatic -s samples.tsv delly_insertion.bcf + delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} + #delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf + #delly2 filter -o ${pair_id}.delly_dup.bcf -f somatic -s samples.tsv ${pair_id}.delly_duplications.bcf + #delly2 filter -o ${pair_id}.delly_inv.bcf -f somatic -s samples.tsv ${pair_id}.delly_inversions.bcf + #delly2 filter -o ${pair_id}.delly_del.bcf -f somatic -s samples.tsv ${pair_id}.delly_deletion.bcf + #delly2 filter -o ${pair_id}.delly_ins.bcf -f somatic -s samples.tsv ${pair_id}.delly_insertion.bcf else #RUN DELLY - delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} - delly2 filter -t BND -o delly_tra.bcf -f germline delly_translocations.bcf - delly2 filter -t DUP -o delly_dup.bcf -f germline delly_duplications.bcf - delly2 filter -t INV -o delly_inv.bcf -f germline delly_inversions.bcf - delly2 filter -t DEL -o delly_del.bcf -f germline delly_deletion.bcf - delly2 filter -t INS -o delly_ins.bcf -f germline delly_insertion.bcf + delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} + #delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf + #delly2 filter -o ${pair_id}.delly_dup.bcf -f germline ${pair_id}.delly_duplications.bcf + #delly2 filter -o ${pair_id}.delly_inv.bcf -f germline ${pair_id}.delly_inversions.bcf + #delly2 filter -o ${pair_id}.delly_del.bcf -f germline ${pair_id}.delly_deletion.bcf + #delly2 filter -o ${pair_id}.delly_ins.bcf -f germline ${pair_id}.delly_insertion.bcf fi #MERGE DELLY AND MAKE BED - bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf - bgzip delly.vcf - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} delly.vcf | bgzip > ${pair_id}.delly.vcf.gz + + bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.delly.sv.norm.vcf.gz | bgzip > ${pair_id}.delly.sv.vcf.gz + bcftools concat -a -O v ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.dellyindel.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.indel -v ${pair_id}.dellyindel.vcf.gz -s + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.delly.indel.norm.vcf.gz | bgzip > ${pair_id}.delly.vcf.gz + elif [[ $method == 'svaba' ]] then if [[ -n ${normal} ]] @@ -107,16 +112,16 @@ then fi #Create SV FILE vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.sv.vcf - cat svaba.sv.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[0] >= 20 )" | bgzip > svaba.vcf.gz - tabix svaba.sv.vcf.gz + bgzip -f svaba.sv.vcf + tabix -f svaba.sv.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.sv -v svaba.sv.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.sv.norm.vcf | bgzip > ${pair_id}.svaba.sv.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.sv.norm.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.svaba.sv.vcf.gz #Create INDEL FILE vcf-concat ${pair_id}.svaba.unfiltered*indel.vcf | vcf-sort -t temp > svaba.indel.vcf - cat svaba.indel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[0] >= 20 )" | bgzip > svaba.indel.vcf.gz - tabix svaba.indel.vcf.gz + bgzip -f svaba.indel.vcf + tabix -f svaba.indel.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.indel -v svaba.indel.vcf.gz -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.indel.norm.vcf | bgzip > ${pair_id}.svaba.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.indel.norm.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.svaba.vcf.gz elif [[ $method == 'lumpy' ]] then #MAKE FILES FOR LUMPY diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index 9bb7953fdf9d677026a047a4390a3aa9db8bddbe..7b7a6f4814461fd8bf5a0ea380220cc802cbf612 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -49,14 +49,25 @@ while (my $line = <VCF>) { foreach (@alts) { $gtdata{DP} += $_; } - } elsif ($gtdata{AD} =~ m/^\d+$/ && $gtdata{DP}){ + } elsif (exists $gtdata{DV} && exists $gtdata{RV}) { + $gtdata{AO} = $gtdata{DV} + $gtdata{RV}; + $gtdata{RO} = $gtdata{DR} + $gtdata{RR}; + $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO}); + $gtdata{DP} = $gtdata{RO}+$gtdata{AO}; + + } elsif (exists $gtdata{DR} && exists $gtdata{SR}){ $gtdata{AO} = $gtdata{AD}; - $gtdata{RO} = $gtdata{DP} - $gtdata{AD}; + $gtdata{DP} = $gtdata{AO} unless $gtdata{DP}; + if ($gtdata{DP} > $gtdata{AD}) { + $gtdata{RO} = $gtdata{DP} - $gtdata{AD}; + } else { + $gtdata{RO} = 0; + } $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO}); } elsif (exists $gtdata{NR} && exists $gtdata{NV}) { - $gtdata{DP} = $gtdata{NR}; - $gtdata{AO} = $gtdata{NV}; - $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; + $gtdata{DP} = $gtdata{NR}; + $gtdata{AO} = $gtdata{NV}; + $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; } elsif (exists $gtdata{AO} && exists $gtdata{RO}) { $gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO}); $gtdata{DP} = $gtdata{RO};