diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 9ae193d5c623bbd06626a463e5ae33904c53eaa6..7b9fca4142c1c7125508f1299db82bcfdc36293b 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -12,7 +12,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:x:y:g:p:uh opt +while getopts :r:x:y:g:p:a:uh opt do case $opt in r) index_path=$OPTARG;; @@ -21,6 +21,7 @@ do u) umi='umi';; g) read_group=$OPTARG;; p) pair_id=$OPTARG;; + a) aligner=$OPTARG;; h) usage;; esac done @@ -42,26 +43,35 @@ then read_group=$pair_id fi +testexe='/project/shared/bicf_workflow_ref/seqprg/bin' + source /etc/profile.d/modules.sh -module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3 +module load bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 baseDir="`dirname \"$0\"`" diff $fq1 $fq2 > difffile - -if [ -s difffile ] +if [[ $aligner == 'bwa' ]] then - bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam -else - bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam + module load bwa/intel/0.7.17 + if [ -s difffile ] + then + bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam + else + bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam + fi +elif [[ $aligner == 'hisat2' ]] +then + module load hisat2/2.1.0-intel + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt fi if [[ $umi == 'umi' ]] && [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then - k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam elif [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then - k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam elif [[ $umi == 'umi' ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.unsort.bam diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 55fc2acc6aa03f853a53b16dd8c37fdc16275f5e..5687a5d19c43811861e59b7b0c3552121046df23 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -34,7 +34,7 @@ fi baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh -module load picard/2.10.3 samtools/1.6 +module load picard/2.10.3 samtools/gcc/1.8 if [ $algo == 'sambamba' ] then @@ -42,10 +42,6 @@ then sambamba markdup -t $SLURM_CPUS_ON_NODE ${sbam} ${pair_id}.dedup.bam elif [ $algo == 'samtools' ] then - module load samtools/1.6 - samtools sort -n -@ $SLURM_CPUS_ON_NODE -o nsort.bam ${sbam} - samtools fixmate -c --output-fmt BAM -m -@ $SLURM_CPUS_ON_NODE nsort.bam fix.bam - samtools sort -n -@ $SLURM_CPUS_ON_NODE -o sort.bam fix.bam samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam elif [ $algo == 'picard' ] then @@ -68,3 +64,4 @@ then else cp ${sbam} ${pair_id}.dedup.bam fi +samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 3e1ec53835531d75db5169237a59e0b74b178f63..dd962339fcf8f21615803c93a27b81b7fa80c9ec 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -42,7 +42,7 @@ module add python/2.7.x-anaconda star/2.5.2b bedtools/2.26.0 if [[ $method == 'trinity' ]] then - module load trinity/1.4.0 + module load trinity/1.6.0 tmphome="/tmp/$USER" if [[ -z $tmphome ]] then diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 743167f9393019ef33b91a60e4c6d6d76c5c84bb..4d23d7c5067c8607b9a8a75b9974d129f076dad9 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -38,7 +38,7 @@ then fi baseDir="`dirname \"$0\"`" -if [[ $capture == '/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/UTSWV2.bed' ]] +if [[ $capture == '${index_path}/UTSWV2.bed' ]] then normals="${index_path}/UTSWV2.normals.cnn" targets="${index_path}/UTSWV2.cnvkit_" @@ -46,17 +46,21 @@ then then normals="${index_path}/UTSWV2.uminormals.cnn" fi -elif [[ $capture == '/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/UTSWV2_2.panelplus.bed' ]] +elif [[ $capture == '${index_path}/UTSWV2_2.panelplus.bed' ]] then normals="${index_path}/panelofnormals.panel1385V2_2.cnn" targets="${index_path}/panel1385V2-2.cnvkit_" +elif [[ $capture == '${index_path}/hemepanelV3.bed' ]] +then + normals="${index_path}/hemepanelV3.flatreference.cnn" + targets="${index_path}/hemepanelV3.cnvkit_" fi echo "${targets}targets.bed" echo "${targets}antitargets.bed" source /etc/profile.d/modules.sh -module load cnvkit/0.9.0 bedtools/2.26.0 +module load cnvkit/0.9.5 bedtools/2.26.0 unset DISPLAY cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index b4ddaccdd975d24fe37ca84d41dcac47b2e7e476..e86ee768a88b5633cddc4ee05653e490b030df12 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -36,7 +36,8 @@ while (my $line = <CYTO>) { my ($chrom,$start,$end,$band) = split(/\t/,$line); my $key = $chrom.":".$start."-".$end; $band =~ m/^(\w)(.+)/; - push @{$cyto{$key}}, [$1,$2]; + next unless ($1 && $2); + push @{$cyto{$key}{$1}},$2; } open OUT, ">$prefix\.cnvcalls.txt" or die $!; @@ -68,6 +69,9 @@ while (my $line = <CNR>) { $genes{$value} = 1 if $keep{$value}; } } + }elsif ($geneids =~ m/:/) { + my ($gene,$chr,$pos) = split(/:/,$geneids); + $genes{$gene} = 1; }else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { @@ -97,6 +101,9 @@ while (my $line = <IN>) { $genes{$value} = 1 if $keep{$value}; } } + }elsif ($geneids =~ m/:/) { + my ($gene,$chr,$pos) = split(/:/,$geneids); + $genes{$gene} = 1; }else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { @@ -115,16 +122,23 @@ while (my $line = <IN>) { $cn_cbio = 2 if ($cn > 4); print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n"; print BIO2 join("\t",$gene,$entrez{$gene},$log2),"\n"; - my @cytoband = sort {$a->[1] <=>$b->[1]} @{$cyto{$key}}; - if (join("",@{$cytoband[0]}) eq join("",@{$cytoband[-1]})) { - $cband = join("",@{$cytoband[0]}); + my @cytoband; + if (@{$cyto{$key}{'p'}}) { + @nums = sort {$b <=> $a} @{$cyto{$key}{'p'}}; + push @cytoband, 'p'.$nums[0],'p'.$nums[-1]; + } if (@{$cyto{$key}{'q'}}) { + @nums = sort {$a <=> $b} @{$cyto{$key}{'q'}}; + push @cytoband, 'q'.$nums[0],'q'.$nums[-1]; + } + if ($cytoband[0] eq $cytoband[-1]) { + $cband = $cytoband[0]; }else { - $cband = join("",@{$cytoband[0]},'-',@{$cytoband[-1]}); + $cband = join("-",$cytoband[0],$cytoband[-1]); } - print OUT2 join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight,$cband),"\n"; - print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n"; - } -} + print OUT2 join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight,$cband),"\n"; + print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n"; + } + } close IN; close OUT; close BIO; diff --git a/variants/filter_pindel.pl b/variants/filter_pindel.pl index c8769e72e9680e222b95e90d5bbeff7e6e59d861..7e56a75aaf2ddd188b4b7f0a50831f39b449424c 100644 --- a/variants/filter_pindel.pl +++ b/variants/filter_pindel.pl @@ -80,6 +80,7 @@ foreach $file (@files) { next if ($tumoraltct[0] eq '.'); $hash{AF} = join(",",@tumormaf); next if ($tumoraltct[0] < 20); + next if ($tumormaf[0] < 0.01); my $keepforvcf = 0; foreach $trx (split(/,/,$hash{ANN})) { my ($allele,$effect,$impact,$gene,$geneid,$feature, diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index 4a610ad37f2df7246ecfb9210a3f20442e0355fe..f8ba7b6fa732a699e2f126a8f21111bd79e7fce0 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -57,7 +57,11 @@ else fi source /etc/profile.d/modules.sh -module load gatk/3.8 samtools/1.6 +user=$USER +module load gatk/4.x singularity/2.6.1 samtools/gcc/1.8 +mkdir /tmp/${user} +export TMP_HOME=/tmp/${user} + samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} if [[ $algo == 'gatkbam_rna' ]] @@ -66,17 +70,21 @@ then java -Xmx4g -jar $PICARD/picard.jar CleanSam INPUT=${sbam} OUTPUT=${pair_id}.clean.bam java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} - samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.clean.bam - java -Xmx4g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T SplitNCigarReads -R ${reffa} -I ${pair_id}.sort.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS - java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt 8 -nct 1 - java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam - java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8 - java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then - samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} - java -Xmx16g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt 8 -nct 1 - java -Xmx16g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam - java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8 - java -Xmx16g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam + +elif [[ $algo == 'abra2' ]] +then + module load abra2/2.18 + mkdir tmpdir + java -Xmx16G -jar /cm/shared/apps/abra2/lib/abra2.jar --in ${sbam} --in-vcf /archive/PHG/PHG_Clinical/phg_workflow/analysis/awesomeproject/GoldIndels.vcf --out ${pair_id}.final.bam --ref ${reffa} --threads $SLURM_CPUS_ON_NODE --tmpdir tmpdir + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam fi diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index c6d47da0d45555026ecd2123de887dee84af600d..d16aad9d43774cd7933c334190fced2dbb39a719 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -57,7 +57,7 @@ else fi source /etc/profile.d/modules.sh -module load python/2.7.x-anaconda picard/2.10.3 samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel +module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel for i in *.bam; do if [[ ! -f ${i}.bai ]] @@ -68,40 +68,44 @@ done if [[ $algo == 'mpileup' ]] then - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "samtools mpileup -t 'AD,DP,INFO/AD' -ug -Q20 -C50 -r {} -f ${reffa} *.bam | bcftools call -vmO z -o ${pair_id}.samchr.{}.vcf.gz" - vcf-concat ${pair_id}.samchr.*.vcf.gz |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf - + threads=`expr $SLURM_CPUS_ON_NODE - 10` + bcftools mpileup --threads $threads -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} *.bam | bcftools call --threads 10 -vmO z -o ${pair_id}.vcf.gz + vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE bgzip ${pair_id}.sam.vcf -elif [[ $algo == 'hotspot' ]] +elif [[ $algo == 'freebayes' ]] then - samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi - bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz - - tabix ${pair_id}.lowfreq.vcf.gz - bcftools annotate -Ov -a ${index_path}/oncokb_hotspot.txt.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | grep '#\|CNT\|OncoKBHotspot' | bgzip > ${pair_id}.hotspot.vcf.gz -elif [[ $algo == 'speedseq' ]] -then - module load speedseq/gcc/0.1.2 - speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam - vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz - + module load freebayes/gcc/1.2.0 parallel/20150122 + bamlist='' + for i in *.bam; do + bamlist="$bamlist --bam ${PWD}/${i}" + done + cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - elif [[ $algo == 'gatk' ]] then - module load gatk/3.8 + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + user=$USER + module load gatk/4.1.2.0 gvcflist='' for i in *.bam; do - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.{}.chr.gatk.g.vcf -nct 2 -L {}" - vcf-concat ${i}.*.chr.gatk.g.vcf |vcf-sort > gatk.g.vcf - rm ${i}.*.chr.gatk.g.vcf - java -jar $PICARD/picard.jar SortVcf I=gatk.g.vcf O=${i}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE - gvcflist="$gvcflist --variant ${i}.gatk.g.vcf" + prefix="${i%.bam}" + echo ${prefix} + gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz + java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf.gz O=${prefix}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE + + gvcflist="$gvcflist -V ${prefix}.gatk.g.vcf" done - java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T GenotypeGVCFs --disable_auto_index_creation_and_locking_when_reading_rods -o gatk.vcf $gvcflist + interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` + gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval + gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz elif [[ $algo == 'platypus' ]] then module load platypus/gcc/0.8.1 bamlist=`join_by , *.bam` - Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz tabix platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz @@ -113,7 +117,7 @@ then else mode="--exome" fi - module load strelka/2.9.0 samtools/1.6 manta/1.3.1 snpeff/4.3q vcftools/0.1.14 + module load strelka/2.9.10 manta/1.3.1 mkdir manta strelka gvcflist='' for i in *.bam; do diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 4a5ef6e9b1515748381ddceab7c6f54dfcff9441..8613f4cc8488f1289407247dfc135841536ee8f1 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -81,13 +81,13 @@ fi baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh - +module load htslib/gcc/1.8 if [ $algo == 'strelka2' ] then - module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 + module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 mkdir manta strelka - configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta + configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta manta/runWorkflow.py -m local -j 8 configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka strelka/runWorkflow.py -m local -j 8 @@ -95,37 +95,28 @@ if [ $algo == 'strelka2' ] fi if [ $algo == 'virmid' ] then - module load virmid/1.2 samtools/1.6 vcftools/0.1.14 + module load virmid/1.2 samtools/gcc/1.8 vcftools/0.1.14 virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz -fi - -if [ $algo == 'speedseq' ] - then - module load snpeff/4.3q speedseq/20160506 samtools/1.6 vcftools/0.1.14 - speedseq somatic -q 10 -t $SLURM_CPUS_ON_NODE -o sssom ${reffa} ${normal} ${tumor} - vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.sssom.vcf.gz -fi - -if [ $algo == 'mutect2' ] +elif [ $algo == 'mutect2' ] then - module load parallel gatk/3.8 snpeff/4.3q samtools/1.6 vcftools/0.1.14 - if [ -z ${tbed} ] - then - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" - else - awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" - fi - vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz -fi - -if [ $algo == 'varscan' ] + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + user=$USER + module load gatk/4.x singularity/2.6.1 picard/2.10.3 + mkdir /tmp/${user} + export TMP_HOME=/tmp/${user} + java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" Mutect2 -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 + vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz +elif [ $algo == 'varscan' ] then - module load samtools/1.6 VarScan/2.4.2 vcftools/0.1.14 + module load samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14 module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup @@ -133,11 +124,9 @@ then VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 VarScan copynumber n.mpileup t.mpileup vscancnv vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz -fi - -if [ $algo == 'shimmer' ] +elif [ $algo == 'shimmer' ] then - module load shimmer/0.1.1 samtools/1.6 vcftools/0.1.14 + module load shimmer/0.1.1 samtools/gcc/1.8 vcftools/0.1.14 shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl module rm java/oracle/jdk1.7.0_51 @@ -145,9 +134,4 @@ then vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz fi -if [ $algo == 'lancet' ] -then - module load snpeff/4.3q lancet samtools/1.6 vcftools/0.1.14 - lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads $SLURM_CPUS_ON_NODE > lancet.vcf - vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') | (FILTER = 'LowVafTumor') | (FILTER = 'LowVafTumor;LowAltCntTumor')) & (GEN[*].DP >= 10)" |bgzip > ${pair_id}.lancet.vcf.gz -fi + diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 1b5ff5d202c6bd49568e6d47aa8e05b4f0d6e6c5..37452dc15b6e80403590c96f9e73995eaa509c14 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -133,48 +133,54 @@ foreach $vcf (@vcffiles) { }else { $filter = '.'; } - $lines{$chrom}{$pos}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; + $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; } close VCF; } -my @callers = ('ssvar','sam','gatk','strelka2','platypus','hotspot'); -if (grep(/mutect/,@vcffiles)) { - @callers = ('sssom','mutect','shimmer','strelka2','varscan','virmid'); -} +my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); + F1:foreach $chr (sort {$a cmp $b} keys %lines) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) { - F4:foreach $alt (sort {$a <=> $b} keys %{$lines{$chr}{$pos}}) { - my @callset; - my %csets; - F3:foreach $caller (sort {$a cmp $b} keys %{$lines{$chr}{$pos}{$alt}}) { - my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot, - $format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$alt}{$caller}}; - @gtdesc = @{$gtdescref}; - foreach $gtd (@gtdesc) { - my ($id,$dp,$maf) = split(/:/,$gtd); - push @{$csets{$id}}, [$caller,$dp,$maf]; - } - push @callset, join("|",$caller,$alt,@gtdesc); - } - my $consistent = 1; - foreach $id (keys %csets) { - my @calls = @{$csets{$id}}; - my @calls = sort {$a[2] <=> $b[2]} @calls; - $consistent = 0 if ($calls[0][2] < 0.25 && $calls[-1][2] - $calls[0][2] > 0.10 && $calls[-1][2]/($calls[0][2]+0.001) > 3); - } - F3:foreach $caller (@callers) { - if ($lines{$chr}{$pos}{$alt}{$caller}) { + F5:foreach $ref (sort {$a <=> $b} keys %{$lines{$chr}{$pos}}) { + F4:foreach $alt (sort {$a <=> $b} keys %{$lines{$chr}{$pos}{$ref}}) { + my @callset; + my %csets; + my %depth; + F3:foreach $caller (sort {$a cmp $b} keys %{$lines{$chr}{$pos}{$ref}{$alt}}) { my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot, - $format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$alt}{$caller}}; - @gts = @{$gtsref}; - @gtdesc = @{$gtdescref}; - $annot = $annot.";CallSet=".join(",",@callset); - unless ($consistent) { - $annot = $annot.";CallSetInconsistent=1"; + $format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$ref}{$alt}{$caller}}; + my @gtdesc = @{$gtdescref}; + my @gtdesc2; + foreach $gtd (@gtdesc) { + my ($id,$dp,$maf) = split(/:/,$gtd); + push @gtdesc2, $dp; + push @{$csets{$id}}, [$caller,$dp,$maf]; + } + @gtdesc2 = sort {$b <=> $a} @gtdesc2; + $depth{$caller} = $gtdesc2[0]; + push @callset, join("|",$caller,$alt,@gtdesc); + } + my $consistent = 1; + foreach $id (keys %csets) { + my @calls = @{$csets{$id}}; + my @calls = sort {$a[2] <=> $b[2]} @calls; + $consistent = 0 if ($calls[0][2] < 0.25 && $calls[-1][2] - $calls[0][2] > 0.10 && $calls[-1][2]/($calls[0][2]+0.001) > 3); + } + @callorder = sort {$depth{$b} <=> $depth{$a}} keys %depth; + F3:foreach $caller (@callers) { + if ($lines{$chr}{$pos}{$ref}{$alt}{$caller}) { + my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot, + $format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$ref}{$alt}{$caller}}; + @gts = @{$gtsref}; + @gtdesc = @{$gtdescref}; + $annot = $annot.";CallSet=".join(",",@callset); + unless ($consistent) { + $annot = $annot.";CallSetInconsistent=1"; + } + print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score, + $filter,$annot,$format,@gts),"\n"; + last F3; } - print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score, - $filter,$annot,$format,@gts),"\n"; - last F3; } } }