diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 9ae193d5c623bbd06626a463e5ae33904c53eaa6..7b9fca4142c1c7125508f1299db82bcfdc36293b 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -12,7 +12,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:x:y:g:p:uh opt +while getopts :r:x:y:g:p:a:uh opt do case $opt in r) index_path=$OPTARG;; @@ -21,6 +21,7 @@ do u) umi='umi';; g) read_group=$OPTARG;; p) pair_id=$OPTARG;; + a) aligner=$OPTARG;; h) usage;; esac done @@ -42,26 +43,35 @@ then read_group=$pair_id fi +testexe='/project/shared/bicf_workflow_ref/seqprg/bin' + source /etc/profile.d/modules.sh -module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3 +module load bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 baseDir="`dirname \"$0\"`" diff $fq1 $fq2 > difffile - -if [ -s difffile ] +if [[ $aligner == 'bwa' ]] then - bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam -else - bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam + module load bwa/intel/0.7.17 + if [ -s difffile ] + then + bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam + else + bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam + fi +elif [[ $aligner == 'hisat2' ]] +then + module load hisat2/2.1.0-intel + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt fi if [[ $umi == 'umi' ]] && [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then - k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam elif [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then - k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam elif [[ $umi == 'umi' ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.unsort.bam diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 55fc2acc6aa03f853a53b16dd8c37fdc16275f5e..95749523810fb659597c5d91d6a6cb4e7151e83d 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -34,7 +34,7 @@ fi baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh -module load picard/2.10.3 samtools/1.6 +module load picard/2.10.3 samtools/gcc/1.8 if [ $algo == 'sambamba' ] then @@ -42,7 +42,6 @@ then sambamba markdup -t $SLURM_CPUS_ON_NODE ${sbam} ${pair_id}.dedup.bam elif [ $algo == 'samtools' ] then - module load samtools/1.6 samtools sort -n -@ $SLURM_CPUS_ON_NODE -o nsort.bam ${sbam} samtools fixmate -c --output-fmt BAM -m -@ $SLURM_CPUS_ON_NODE nsort.bam fix.bam samtools sort -n -@ $SLURM_CPUS_ON_NODE -o sort.bam fix.bam @@ -68,3 +67,4 @@ then else cp ${sbam} ${pair_id}.dedup.bam fi +samtools index --threads $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam diff --git a/variants/filter_pindel.pl b/variants/filter_pindel.pl index efc9a5cd79daea2082d07e3d011622096c562ba8..07ee4000a36277a4e7f48e2d2e64bd56fa5f8578 100644 --- a/variants/filter_pindel.pl +++ b/variants/filter_pindel.pl @@ -80,7 +80,7 @@ foreach $file (@files) { next if ($tumoraltct[0] eq '.'); $hash{AF} = join(",",@tumormaf); next if ($tumoraltct[0] < 20); - next if ($tumormaf[0] < 0.05); + next if ($tumormaf[0] < 0.01); my $keepforvcf = 0; foreach $trx (split(/,/,$hash{ANN})) { my ($allele,$effect,$impact,$gene,$geneid,$feature, diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index 4a610ad37f2df7246ecfb9210a3f20442e0355fe..aaead372e7bb71d5fd39ba2c3f06f127c8c99186 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -57,7 +57,11 @@ else fi source /etc/profile.d/modules.sh -module load gatk/3.8 samtools/1.6 +user=$USER +module load gatk/4.x singularity/2.6.1 +mkdir /tmp/${user} +export TMP_HOME=/tmp/${user} + samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} if [[ $algo == 'gatkbam_rna' ]] @@ -72,11 +76,17 @@ then java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8 java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 + elif [[ $algo == 'gatkbam' ]] then - samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} - java -Xmx16g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt 8 -nct 1 - java -Xmx16g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam - java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8 - java -Xmx16g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${i} --known-sites ${gatk4_dbsnp} -R ${reffa} -O ${prefix}.recal_data.table --use-original-qualities + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${i} -R ${reffa} -O ${prefix}.final.bam --use-original-qualities -bqsr ${prefix}.recal_data.table + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam + +elif [[ $algo == 'abra2' ]] +then + module load abra2/2.18 + mkdir tmpdir + java -Xmx16G -jar /cm/shared/apps/abra2/lib/abra2.jar --in ${sbam} --in-vcf /archive/PHG/PHG_Clinical/phg_workflow/analysis/awesomeproject/GoldIndels.vcf --out ${pair_id}.final.bam --ref ${reffa} --threads $SLURM_CPUS_ON_NODE --tmpdir tmpdir + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam fi diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 73e0740050cd97bdab0d2be44c77be0dcd604393..2e0144357e56761578676d15a97f1b4641868c99 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -57,7 +57,7 @@ else fi source /etc/profile.d/modules.sh -module load python/2.7.x-anaconda picard/2.10.3 samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel +module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel for i in *.bam; do if [[ ! -f ${i}.bai ]] @@ -68,43 +68,51 @@ done if [[ $algo == 'mpileup' ]] then - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "samtools mpileup -t 'AD,DP,INFO/AD' -ug -Q20 -C50 -r {} -f ${reffa} *.bam | bcftools call -vmO z -o ${pair_id}.samchr.{}.vcf.gz" - vcf-concat ${pair_id}.samchr.*.vcf.gz |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf - + threads=`expr $SLURM_CPUS_ON_NODE - 10` + bcftools mpileup --threads $threads -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} *.bam | bcftools call --threads 10 -vmO z -o ${pair_id}.vcf.gz + vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE bgzip ${pair_id}.sam.vcf -elif [[ $algo == 'hotspot' ]] + +elif [[ $algo == 'freebayes' ]] then - samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi - bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz - - tabix ${pair_id}.lowfreq.vcf.gz - bcftools annotate -Ov -a ${index_path}/oncokb_hotspot.txt.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | grep '#\|CNT\|OncoKBHotspot' | bgzip > ${pair_id}.hotspot.vcf.gz -elif [[ $algo == 'speedseq' ]] -then - module load speedseq/gcc/0.1.2 - speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam - vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz - + module load freebayes/gcc/1.2.0 + bamlist='' + for i in *.bam; do + bamlist="$bamlist --bam ${i}" + done + freebayes-parallel ${index_path}/genomefile.5M.txt $SLURM_CPUS_ON_NODE -f ${reffa} --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 --vcf fb.vcf $bamlist + vcf-annotate -n --fill-type fb.vcf| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - + elif [[ $algo == 'gatk' ]] then - module load gatk/3.8 + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + user=$USER + module load gatk/4.x singularity/2.6.1 + mkdir /tmp/${user} + export TMP_HOME=/tmp/${user} gvcflist='' for i in *.bam; do - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.{}.chr.gatk.g.vcf -nct 2 -L {}" - vcf-concat ${i}.*.chr.gatk.g.vcf |vcf-sort > gatk.g.vcf - rm ${i}.*.chr.gatk.g.vcf - java -jar $PICARD/picard.jar SortVcf I=gatk.g.vcf O=${i}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE - gvcflist="$gvcflist --variant ${i}.gatk.g.vcf" + prefix="${i%.bam}" + echo ${prefix} + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz + java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf.gz O=${prefix}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE + + gvcflist="$gvcflist --variant ${prefix}.gatk.g.vcf" done - java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T GenotypeGVCFs --disable_auto_index_creation_and_locking_when_reading_rods -o gatk.vcf $gvcflist + singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenotypeGVCFs $gvcflist -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz + elif [[ $algo == 'platypus' ]] then module load platypus/gcc/0.8.1 bamlist=`join_by , *.bam` - Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz tabix platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz + elif [[ $algo == 'strelka2' ]] then if [[ $rna == 1 ]] @@ -113,7 +121,7 @@ then else mode="--exome" fi - module load strelka/2.9.0 samtools/1.6 manta/1.3.1 snpeff/4.3q vcftools/0.1.14 + module load strelka/2.9.10 manta/1.3.1 mkdir manta strelka gvcflist='' for i in *.bam; do @@ -125,18 +133,3 @@ then strelka/runWorkflow.py -m local -j 8 bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz fi -elif [[ $algo == 'gatk4' ]] -then - gatk4_dbsnp=/project/PHG/PHG_Clinical/devel/phg_workflow/_reference_files/dbSnp.vcf.gz - user=$USER - module load gatk/4.x samtools/1.6 - mkdir /tmp/${user} - export TMP_HOME=/tmp/${user} - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${bam} --known-sites ${gatk4_dbsnp} -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities - #singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${bam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table --static-quantized-quals 10 --static-quantize - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${bam} -R ${reffa} -O ${pair_id}.final.gatk4.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table - chrinterval=${ref}/genomefile.chr.txt - cut -f 1 ${chrinterval} | parallel --delay 2 -j 2 ' singularity exec -H /tmp/'${user}' /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" HaplotypeCaller -R '${reffa}' -I '${pair_id}'.final.gatk4.bam -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance --emit-ref-confidence GVCF -O '${pair_id}'.{}.vcf.gz -L {}' - vcf-concat ${pair_id}.*.vcf.gz | vcf-sort > haplotypecaller.vcf - java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf O=${pair_id}.haplotypecaller.vcf R=${reffa} CREATE_INDEX=TRUE -fi diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 4a5ef6e9b1515748381ddceab7c6f54dfcff9441..76eb6e21ffc8b6a54897486967e0974949d13aee 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -85,9 +85,9 @@ source /etc/profile.d/modules.sh if [ $algo == 'strelka2' ] then - module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 + module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 mkdir manta strelka - configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta + configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta manta/runWorkflow.py -m local -j 8 configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka strelka/runWorkflow.py -m local -j 8 @@ -95,7 +95,7 @@ if [ $algo == 'strelka2' ] fi if [ $algo == 'virmid' ] then - module load virmid/1.2 samtools/1.6 vcftools/0.1.14 + module load virmid/1.2 samtools/gcc/1.8 vcftools/0.1.14 virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf @@ -104,23 +104,19 @@ if [ $algo == 'virmid' ] vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz fi -if [ $algo == 'speedseq' ] - then - module load snpeff/4.3q speedseq/20160506 samtools/1.6 vcftools/0.1.14 - speedseq somatic -q 10 -t $SLURM_CPUS_ON_NODE -o sssom ${reffa} ${normal} ${tumor} - vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.sssom.vcf.gz -fi - if [ $algo == 'mutect2' ] then - module load parallel gatk/3.8 snpeff/4.3q samtools/1.6 vcftools/0.1.14 - if [ -z ${tbed} ] - then - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" - else - awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" - fi - vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + user=$USER + module load gatk/4.x singularity/2.6.1 picard/2.10.3 + mkdir /tmp/${user} + export TMP_HOME=/tmp/${user} + java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics -I ${tumor} -O=artifact_metrics.txt -R ${reffa} + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" MuTect2 -R ${reffa} -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A StrandArtifact -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -A -TandemRepeat I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --cosmic ${cosmic} -o ${tid}.mutect.vcf + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterByOrientationBias --artifactModes 'G/T' -V ${tid}.mutect.filt.vcf -P artifact_metrics.txt --output ${tid}.mutect.filt2.vcf + module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 + vcf-concat ${tid}.mutect.filt2.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz fi if [ $algo == 'varscan' ] @@ -145,9 +141,4 @@ then vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz fi -if [ $algo == 'lancet' ] -then - module load snpeff/4.3q lancet samtools/1.6 vcftools/0.1.14 - lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads $SLURM_CPUS_ON_NODE > lancet.vcf - vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') | (FILTER = 'LowVafTumor') | (FILTER = 'LowVafTumor;LowAltCntTumor')) & (GEN[*].DP >= 10)" |bgzip > ${pair_id}.lancet.vcf.gz -fi + diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 1b5ff5d202c6bd49568e6d47aa8e05b4f0d6e6c5..30fc05271d7085cadb8fa4c970cb920c0eb5457e 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -137,10 +137,8 @@ foreach $vcf (@vcffiles) { } close VCF; } -my @callers = ('ssvar','sam','gatk','strelka2','platypus','hotspot'); -if (grep(/mutect/,@vcffiles)) { - @callers = ('sssom','mutect','shimmer','strelka2','varscan','virmid'); -} +my @callers = ('fb','gatk','platypus','mutect','strelka2','shimmer','strelka2','virmid','strelka2',); + F1:foreach $chr (sort {$a cmp $b} keys %lines) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) { F4:foreach $alt (sort {$a <=> $b} keys %{$lines{$chr}{$pos}}) {