From f7278ea12986b9cb282d1ee18821154610552f85 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Mon, 1 Oct 2018 11:44:43 -0500 Subject: [PATCH] add pindel --- variants/svcalling.sh | 149 +++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 82 deletions(-) diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 3766769..b0bcaae 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -11,14 +11,16 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:a:b:n:k:p:h opt +while getopts :r:p:b:t:i:k:n:m:h opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; b) sbam=$OPTARG;; - k) keepid=$OPTARG;; + k) tid=$OPTARG;; + i) nid=$OPTARG;; n) normal=$OPTARG;; + m) method=$OPTARG;; h) usage;; esac done @@ -50,92 +52,75 @@ source /etc/profile.d/modules.sh module load speedseq/20160506 novoBreak/v1.1.3 delly2/v0.7.7-multi samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 mkdir temp -#if [[ -n ${normal} ]] -#then - #run_novoBreak.sh /cm/shared/apps/novoBreak/novoBreak_distribution_v1.1.3rc ${reffa} ${sbam} ${normal} $SLURM_CPUS_ON_NODE - #perl $baseDir/vcf2bed.sv.pl novoBreak.pass.flt.vcf |sort -T temp -V -k 1,1 -k 2,2n > novobreak.bed - #mv novoBreak.pass.flt.vcf ${pair_id}.novobreak.vcf - #bgzip ${pair_id}.novobreak.vcf -#fi -if [[ -n ${normal} ]] +genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"` + +if [[ $method == 'pindel' ]] then - #RUN DELLY - delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 filter -t BND -o delly_tra.bcf -f somatic -s samples.tsv delly_translocations.bcf - delly2 filter -t DUP -o delly_dup.bcf -f somatic -s samples.tsv delly_duplications.bcf - delly2 filter -t INV -o delly_inv.bcf -f somatic -s samples.tsv delly_inversions.bcf - delly2 filter -t DEL -o delly_del.bcf -f somatic -s samples.tsv delly_deletion.bcf - delly2 filter -t INS -o delly_ins.bcf -f somatic -s samples.tsv delly_insertion.bcf -else -#RUN DELLY - delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} - delly2 filter -t BND -o delly_tra.bcf -f germline delly_translocations.bcf - delly2 filter -t DUP -o delly_dup.bcf -f germline delly_duplications.bcf - delly2 filter -t INV -o delly_inv.bcf -f germline delly_inversions.bcf - delly2 filter -t DEL -o delly_del.bcf -f germline delly_deletion.bcf - delly2 filter -t INS -o delly_ins.bcf -f germline delly_insertion.bcf + module load samtools/1.6 pindel/0.2.5-intel snpeff/4.3q + echo -e "${sbam}\t400\t${tid}" > ${pair_id}.pindel.config + if [[ -n ${normal} ]] + then + echo -e "${normal}\t400\t${nid}" >> ${pair_id}.pindel.config + fi + pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP + pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.pindel.vcf.gz fi -#MERGE DELLY AND MAKE BED -bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf -perl $baseDir/vcf2bed.sv.pl delly.vcf | sort -V -k 1,1 -k 2,2n > delly.bed -bgzip delly.vcf -if [[ -n ${normal} ]] +if [[ $method == 'delly' ]] then - tabix delly.vcf.gz - bcftools view -O z -o ${pair_id}.delly.vcf.gz -s ${keepid} delly.vcf.gz -else - mv delly.vcf.gz ${pair_id}.delly.vcf.gz + module load delly2/v0.7.7-multi samtools/1.6 snpeff/4.3q + if [[ -n ${normal} ]] + then + #RUN DELLY + echo -e "${normal}\tcontrol"> samples.tsv + echo -e "${tumor}\ttumor" >> samples.tsv + delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} + delly2 filter -t BND -o delly_tra.bcf -f somatic -s samples.tsv delly_translocations.bcf + delly2 filter -t DUP -o delly_dup.bcf -f somatic -s samples.tsv delly_duplications.bcf + delly2 filter -t INV -o delly_inv.bcf -f somatic -s samples.tsv delly_inversions.bcf + delly2 filter -t DEL -o delly_del.bcf -f somatic -s samples.tsv delly_deletion.bcf + delly2 filter -t INS -o delly_ins.bcf -f somatic -s samples.tsv delly_insertion.bcf + else + #RUN DELLY + delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} + delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} + delly2 filter -t BND -o delly_tra.bcf -f germline delly_translocations.bcf + delly2 filter -t DUP -o delly_dup.bcf -f germline delly_duplications.bcf + delly2 filter -t INV -o delly_inv.bcf -f germline delly_inversions.bcf + delly2 filter -t DEL -o delly_del.bcf -f germline delly_deletion.bcf + delly2 filter -t INS -o delly_ins.bcf -f germline delly_insertion.bcf + fi + #MERGE DELLY AND MAKE BED + bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf + bgzip delly.vcf + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 delly.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.delly.vcf.gz fi -tabix ${pair_id}.delly.vcf.gz -#MAKE FILES FOR LUMPY -samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${sbam} -samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam -gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o splitters.bam - -gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o discordants.bam - -#RUN LUMPY -if [[ -n ${normal} ]] +if [[ $method == 'lumpy' ]] then - samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${normal} + #MAKE FILES FOR LUMPY + samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${sbam} samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam - gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam - - gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam - - speedseq sv -t $SLURM_CPUS_ON_NODE -o sssv -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed - java -jar $SNPEFF_HOME/SnpSift.jar filter -n "GEN[0].SU > 1" sssv.sv.vcf.gz |bgzip > tumor.sssv.sv.vcf.gz - tabix tumor.sssv.sv.vcf.gz - bcftools view -O z -o ${pair_id}.sssv.sv.vcf.gz -s ${keepid} tumor.sssv.sv.vcf.gz -else - speedseq sv -t $SLURM_CPUS_ON_NODE -o ${pair_id}.sssv -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed + gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o splitters.bam - + gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o discordants.bam - + #RUN LUMPY + if [[ -n ${normal} ]] + then + samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${normal} + samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam + gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam - + gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam - + speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed + else + speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed + fi + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz fi - -java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 10" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf -perl $baseDir/vcf2bed.sv.pl lumpy.vcf > lumpy.bed - -#COMPARE DELLY & LUMPY -#if [[ -n ${normal} ]] -#then - #bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed - #zcat ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz - #grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz -#else -#fi - -bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed -grep delly sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.delly.vcf.gz |bgzip > svt2.vcf.gz -grep lumpy sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'delly' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.sssv.sv.vcf.gz |bgzip > svt3.vcf.gz - -vcf-concat svt*.vcf.gz | vcf-sort -t temp > ${pair_id}.sv.vcf -java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf > ${pair_id}.sv.annot.vcf - -perl $baseDir/svannot.pl -i ${pair_id}.sv.annot.vcf -bgzip ${pair_id}.sv.annot.vcf -tabix ${pair_id}.sv.annot.vcf.gz -- GitLab