diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 95749523810fb659597c5d91d6a6cb4e7151e83d..9a5eb6b8eae39f6f838c813debf33425989aef1a 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -67,4 +67,4 @@ then else cp ${sbam} ${pair_id}.dedup.bam fi -samtools index --threads $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam +samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 3e1ec53835531d75db5169237a59e0b74b178f63..dd962339fcf8f21615803c93a27b81b7fa80c9ec 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -42,7 +42,7 @@ module add python/2.7.x-anaconda star/2.5.2b bedtools/2.26.0 if [[ $method == 'trinity' ]] then - module load trinity/1.4.0 + module load trinity/1.6.0 tmphome="/tmp/$USER" if [[ -z $tmphome ]] then diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 743167f9393019ef33b91a60e4c6d6d76c5c84bb..4d23d7c5067c8607b9a8a75b9974d129f076dad9 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -38,7 +38,7 @@ then fi baseDir="`dirname \"$0\"`" -if [[ $capture == '/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/UTSWV2.bed' ]] +if [[ $capture == '${index_path}/UTSWV2.bed' ]] then normals="${index_path}/UTSWV2.normals.cnn" targets="${index_path}/UTSWV2.cnvkit_" @@ -46,17 +46,21 @@ then then normals="${index_path}/UTSWV2.uminormals.cnn" fi -elif [[ $capture == '/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/UTSWV2_2.panelplus.bed' ]] +elif [[ $capture == '${index_path}/UTSWV2_2.panelplus.bed' ]] then normals="${index_path}/panelofnormals.panel1385V2_2.cnn" targets="${index_path}/panel1385V2-2.cnvkit_" +elif [[ $capture == '${index_path}/hemepanelV3.bed' ]] +then + normals="${index_path}/hemepanelV3.flatreference.cnn" + targets="${index_path}/hemepanelV3.cnvkit_" fi echo "${targets}targets.bed" echo "${targets}antitargets.bed" source /etc/profile.d/modules.sh -module load cnvkit/0.9.0 bedtools/2.26.0 +module load cnvkit/0.9.5 bedtools/2.26.0 unset DISPLAY cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index b4ddaccdd975d24fe37ca84d41dcac47b2e7e476..99a55e0c351a443e3da334fcf54890fed3dbac74 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -68,6 +68,9 @@ while (my $line = <CNR>) { $genes{$value} = 1 if $keep{$value}; } } + }elsif ($geneids =~ m/:/) { + my ($gene,$chr,$pos) = split(/:/,$geneids); + $genes{$gene} = 1; }else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { @@ -97,6 +100,9 @@ while (my $line = <IN>) { $genes{$value} = 1 if $keep{$value}; } } + }elsif ($geneids =~ m/:/) { + my ($gene,$chr,$pos) = split(/:/,$geneids); + $genes{$gene} = 1; }else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index aaead372e7bb71d5fd39ba2c3f06f127c8c99186..366ee3a8b7471cd79d7f23b0407d5ddbd53aaaab 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -70,17 +70,15 @@ then java -Xmx4g -jar $PICARD/picard.jar CleanSam INPUT=${sbam} OUTPUT=${pair_id}.clean.bam java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} - samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.clean.bam - java -Xmx4g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T SplitNCigarReads -R ${reffa} -I ${pair_id}.sort.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS - java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt 8 -nct 1 - java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam - java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8 - java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 - + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${i} --known-sites ${gatk4_dbsnp} -R ${reffa} -O ${prefix}.recal_data.table --use-original-qualities - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${i} -R ${reffa} -O ${prefix}.final.bam --use-original-qualities -bqsr ${prefix}.recal_data.table + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'abra2' ]] diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 2e0144357e56761578676d15a97f1b4641868c99..8b7ee199221d35222d255037a9862d3475959a2d 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -73,17 +73,15 @@ then vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE bgzip ${pair_id}.sam.vcf - elif [[ $algo == 'freebayes' ]] then - module load freebayes/gcc/1.2.0 + module load freebayes/gcc/1.2.0 parallel/20150122 bamlist='' for i in *.bam; do - bamlist="$bamlist --bam ${i}" + bamlist="$bamlist --bam ${PWD}/${i}" done - freebayes-parallel ${index_path}/genomefile.5M.txt $SLURM_CPUS_ON_NODE -f ${reffa} --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 --vcf fb.vcf $bamlist - vcf-annotate -n --fill-type fb.vcf| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - - + cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - elif [[ $algo == 'gatk' ]] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz @@ -103,7 +101,6 @@ then singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenotypeGVCFs $gvcflist -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz - elif [[ $algo == 'platypus' ]] then module load platypus/gcc/0.8.1 @@ -112,7 +109,6 @@ then vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz tabix platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz - elif [[ $algo == 'strelka2' ]] then if [[ $rna == 1 ]] diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 76eb6e21ffc8b6a54897486967e0974949d13aee..8613f4cc8488f1289407247dfc135841536ee8f1 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -81,7 +81,7 @@ fi baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh - +module load htslib/gcc/1.8 if [ $algo == 'strelka2' ] then @@ -102,26 +102,21 @@ if [ $algo == 'virmid' ] module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz -fi - -if [ $algo == 'mutect2' ] +elif [ $algo == 'mutect2' ] then - gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz - user=$USER - module load gatk/4.x singularity/2.6.1 picard/2.10.3 - mkdir /tmp/${user} - export TMP_HOME=/tmp/${user} - java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics -I ${tumor} -O=artifact_metrics.txt -R ${reffa} - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" MuTect2 -R ${reffa} -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A StrandArtifact -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -A -TandemRepeat I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --cosmic ${cosmic} -o ${tid}.mutect.vcf - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterByOrientationBias --artifactModes 'G/T' -V ${tid}.mutect.filt.vcf -P artifact_metrics.txt --output ${tid}.mutect.filt2.vcf - module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 - vcf-concat ${tid}.mutect.filt2.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz -fi - -if [ $algo == 'varscan' ] + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + user=$USER + module load gatk/4.x singularity/2.6.1 picard/2.10.3 + mkdir /tmp/${user} + export TMP_HOME=/tmp/${user} + java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" Mutect2 -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf + singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + module load snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 + vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz +elif [ $algo == 'varscan' ] then - module load samtools/1.6 VarScan/2.4.2 vcftools/0.1.14 + module load samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14 module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup @@ -129,11 +124,9 @@ then VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 VarScan copynumber n.mpileup t.mpileup vscancnv vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz -fi - -if [ $algo == 'shimmer' ] +elif [ $algo == 'shimmer' ] then - module load shimmer/0.1.1 samtools/1.6 vcftools/0.1.14 + module load shimmer/0.1.1 samtools/gcc/1.8 vcftools/0.1.14 shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl module rm java/oracle/jdk1.7.0_51 diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 30fc05271d7085cadb8fa4c970cb920c0eb5457e..8d2f450720a28b123fffa3f44fa0ecfb307fce08 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -137,21 +137,26 @@ foreach $vcf (@vcffiles) { } close VCF; } -my @callers = ('fb','gatk','platypus','mutect','strelka2','shimmer','strelka2','virmid','strelka2',); +my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); F1:foreach $chr (sort {$a cmp $b} keys %lines) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) { F4:foreach $alt (sort {$a <=> $b} keys %{$lines{$chr}{$pos}}) { my @callset; my %csets; + my %depth; F3:foreach $caller (sort {$a cmp $b} keys %{$lines{$chr}{$pos}{$alt}}) { my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot, $format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$alt}{$caller}}; - @gtdesc = @{$gtdescref}; + my @gtdesc = @{$gtdescref}; + my @gtdesc2; foreach $gtd (@gtdesc) { my ($id,$dp,$maf) = split(/:/,$gtd); + push @gtdesc2, $dp; push @{$csets{$id}}, [$caller,$dp,$maf]; } + @gtdesc2 = sort {$b <=> $a} @gtdesc2; + $depth{$caller} = $gtdesc2[0]; push @callset, join("|",$caller,$alt,@gtdesc); } my $consistent = 1; @@ -160,6 +165,7 @@ F1:foreach $chr (sort {$a cmp $b} keys %lines) { my @calls = sort {$a[2] <=> $b[2]} @calls; $consistent = 0 if ($calls[0][2] < 0.25 && $calls[-1][2] - $calls[0][2] > 0.10 && $calls[-1][2]/($calls[0][2]+0.001) > 3); } + @callorder = sort {$depth{$b} <=> $depth{$a}} keys %depth; F3:foreach $caller (@callers) { if ($lines{$chr}{$pos}{$alt}{$caller}) { my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot,