Skip to content
Snippets Groups Projects
Commit aed49914 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

update cnv, mutect2

parent 4db3ad56
No related merge requests found
...@@ -11,7 +11,7 @@ usage() { ...@@ -11,7 +11,7 @@ usage() {
exit 1 exit 1
} }
OPTIND=1 # Reset OPTIND OPTIND=1 # Reset OPTIND
while getopts :b:p:n:f:t:c:uh opt while getopts :b:p:n:f:t:c:uqh opt
do do
case $opt in case $opt in
b) sbam=$OPTARG;; b) sbam=$OPTARG;;
...@@ -21,6 +21,7 @@ do ...@@ -21,6 +21,7 @@ do
t) targets=$OPTARG;; t) targets=$OPTARG;;
c) capture=$OPTARG;; c) capture=$OPTARG;;
u) umi='umi';; u) umi='umi';;
q) idtsnp=1;;
h) usage;; h) usage;;
esac esac
done done
...@@ -54,15 +55,24 @@ source /etc/profile.d/modules.sh ...@@ -54,15 +55,24 @@ source /etc/profile.d/modules.sh
module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8
unset DISPLAY unset DISPLAY
#samtools index ${sbam} if [[ $idtsnp == 1 ]]
#bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/NGSCheckMate.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf then
samtools index ${sbam}
bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/clinseq_prj/IDT_snps.120bp.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf
fi
cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr
cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
#cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.ballelecall.cns if [[ $idtsnp == 1 ]]
then
cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.call.cns
else
cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
fi
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed
perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns
...@@ -11,13 +11,14 @@ usage() { ...@@ -11,13 +11,14 @@ usage() {
exit 1 exit 1
} }
OPTIND=1 # Reset OPTIND OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:th opt while getopts :r:a:b:q:p:th opt
do do
case $opt in case $opt in
r) index_path=$OPTARG;; r) index_path=$OPTARG;;
p) pair_id=$OPTARG;; p) pair_id=$OPTARG;;
a) algo=$OPTARG;; a) algo=$OPTARG;;
t) rna=1;; t) rna=1;;
q) pon==$OPTARG;;
h) usage;; h) usage;;
esac esac
done done
...@@ -82,11 +83,19 @@ then ...@@ -82,11 +83,19 @@ then
done done
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf"
vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz -
elif [[ $algo == 'platypus' ]]
then
module load platypus/gcc/0.8.1
bamlist=`join_by , *.bam`
Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
elif [[ $algo == 'gatk' ]] elif [[ $algo == 'gatk' ]]
then then
gatk4_dbsnp=/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/dbSnp.gatk4.vcf.gz gatk4_dbsnp=/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/dbSnp.gatk4.vcf.gz
user=$USER user=$USER
module load gatk/4.1.2.0 module load gatk/4.1.4.0
gvcflist='' gvcflist=''
for i in *.bam; do for i in *.bam; do
prefix="${i%.bam}" prefix="${i%.bam}"
...@@ -101,14 +110,18 @@ then ...@@ -101,14 +110,18 @@ then
gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf
bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz
tabix ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz
elif [[ $algo == 'platypus' ]] elif [ $algo == 'mutect2' ]
then then
module load platypus/gcc/0.8.1 gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
bamlist=`join_by , *.bam` module load gatk/4.1.4.0
Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf for i in *.bam; do
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz prefix="${i%.bam}"
tabix platypus.vcf.gz echo ${prefix}
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${i} O=artifact_metrics.txt R=${reffa}
gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${i} --output ${prefix}.mutect.vcf
gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf
vcf-sort ${prefix}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${prefix}.mutect.vcf.gz
done
elif [[ $algo == 'strelka2' ]] elif [[ $algo == 'strelka2' ]]
then then
if [[ $rna == 1 ]] if [[ $rna == 1 ]]
......
...@@ -20,7 +20,7 @@ usage(){ ...@@ -20,7 +20,7 @@ usage(){
} }
OPTIND=1 # Reset OPTIND OPTIND=1 # Reset OPTIND
while getopts :n:t:p:r:x:y:i:j:a:b:h opt while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt
do do
case $opt in case $opt in
r) index_path=$OPTARG;; r) index_path=$OPTARG;;
...@@ -33,6 +33,7 @@ do ...@@ -33,6 +33,7 @@ do
j) mtumor=$OPTARG;; j) mtumor=$OPTARG;;
a) algo=$OPTARG;; a) algo=$OPTARG;;
b) tbed=$OPTARG;; b) tbed=$OPTARG;;
q) pon==$OPTARG;;
h) usage;; h) usage;;
esac esac
done done
...@@ -54,6 +55,12 @@ then ...@@ -54,6 +55,12 @@ then
mtumor=tumor mtumor=tumor
mnormal=normal mnormal=normal
fi fi
if [[ -z $pon ]]
then
ponopt='';
else
ponopt="--pon $pon"
fi
if [[ -a "${index_path}/genome.fa" ]] if [[ -a "${index_path}/genome.fa" ]]
then then
...@@ -105,14 +112,10 @@ if [ $algo == 'virmid' ] ...@@ -105,14 +112,10 @@ if [ $algo == 'virmid' ]
elif [ $algo == 'mutect2' ] elif [ $algo == 'mutect2' ]
then then
gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
user=$USER module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
module load gatk/4.x singularity/2.6.1 picard/2.10.3
mkdir /tmp/${user}
export TMP_HOME=/tmp/${user}
java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" Mutect2 -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [ $algo == 'varscan' ] elif [ $algo == 'varscan' ]
then then
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment