From aed499148f69fb5c56706df79d4da9637a92bd8e Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Mon, 2 Dec 2019 13:11:54 -0600 Subject: [PATCH] update cnv, mutect2 --- variants/cnvkit.sh | 20 +++++++++++++++----- variants/germline_vc.sh | 31 ++++++++++++++++++++++--------- variants/somatic_vc.sh | 19 +++++++++++-------- 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 1031ec3..b4a3eff 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -11,7 +11,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :b:p:n:f:t:c:uh opt +while getopts :b:p:n:f:t:c:uqh opt do case $opt in b) sbam=$OPTARG;; @@ -21,6 +21,7 @@ do t) targets=$OPTARG;; c) capture=$OPTARG;; u) umi='umi';; + q) idtsnp=1;; h) usage;; esac done @@ -54,15 +55,24 @@ source /etc/profile.d/modules.sh module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 unset DISPLAY -#samtools index ${sbam} -#bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/NGSCheckMate.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf +if [[ $idtsnp == 1 ]] +then + samtools index ${sbam} + bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/clinseq_prj/IDT_snps.120bp.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf +fi cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns -cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns -#cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.ballelecall.cns + +if [[ $idtsnp == 1 ]] +then + cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.call.cns +else + cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns +fi + cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 2ddc6f7..62006d2 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -11,13 +11,14 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:a:b:p:th opt +while getopts :r:a:b:q:p:th opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; a) algo=$OPTARG;; t) rna=1;; + q) pon==$OPTARG;; h) usage;; esac done @@ -82,11 +83,19 @@ then done cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - +elif [[ $algo == 'platypus' ]] +then + module load platypus/gcc/0.8.1 + bamlist=`join_by , *.bam` + Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz + tabix platypus.vcf.gz + bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz elif [[ $algo == 'gatk' ]] then gatk4_dbsnp=/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/dbSnp.gatk4.vcf.gz user=$USER - module load gatk/4.1.2.0 + module load gatk/4.1.4.0 gvcflist='' for i in *.bam; do prefix="${i%.bam}" @@ -101,14 +110,18 @@ then gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz -elif [[ $algo == 'platypus' ]] +elif [ $algo == 'mutect2' ] then - module load platypus/gcc/0.8.1 - bamlist=`join_by , *.bam` - Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf - vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz - tabix platypus.vcf.gz - bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + module load gatk/4.1.4.0 + for i in *.bam; do + prefix="${i%.bam}" + echo ${prefix} + java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${i} O=artifact_metrics.txt R=${reffa} + gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${i} --output ${prefix}.mutect.vcf + gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf + vcf-sort ${prefix}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${prefix}.mutect.vcf.gz + done elif [[ $algo == 'strelka2' ]] then if [[ $rna == 1 ]] diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 8613f4c..e6d6c90 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -20,7 +20,7 @@ usage(){ } OPTIND=1 # Reset OPTIND -while getopts :n:t:p:r:x:y:i:j:a:b:h opt +while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt do case $opt in r) index_path=$OPTARG;; @@ -33,6 +33,7 @@ do j) mtumor=$OPTARG;; a) algo=$OPTARG;; b) tbed=$OPTARG;; + q) pon==$OPTARG;; h) usage;; esac done @@ -54,6 +55,12 @@ then mtumor=tumor mnormal=normal fi +if [[ -z $pon ]] +then + ponopt=''; +else + ponopt="--pon $pon" +fi if [[ -a "${index_path}/genome.fa" ]] then @@ -105,14 +112,10 @@ if [ $algo == 'virmid' ] elif [ $algo == 'mutect2' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz - user=$USER - module load gatk/4.x singularity/2.6.1 picard/2.10.3 - mkdir /tmp/${user} - export TMP_HOME=/tmp/${user} + module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" Mutect2 -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf - module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 + gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf + gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then -- GitLab