diff --git a/alignment/bam2tdf.sh b/alignment/bam2tdf.sh index db7faefcdd1b5a3bd900a0759961730302503a9a..e6299d58137337fa2ded981c5239809c2ecf8e43 100644 --- a/alignment/bam2tdf.sh +++ b/alignment/bam2tdf.sh @@ -23,13 +23,15 @@ shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi + baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh module load igvtools/2.3.71 samtools/1.6 -samtools index -@ $SLURM_CPUS_ON_NODE $bam +samtools index -@ $NPROC $bam igvtools count -z 5 $bam ${pair_id}.tdf ${index_path}/igv/human.genome diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index bfc8c54e77a13893ebcc8b2440bafac09ae821ac..466d01c0fe62ec2f08551756f1de351cc25b6347 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -40,28 +40,29 @@ samtools flagstat ${sbam} > ${pair_id}.flagstat.txt fastqc -f bam ${sbam} baseDir="`dirname \"$0\"`" -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ $dedup == 1 ]] then mv $sbam ori.bam - samtools view -@ $SLURM_CPUS_ON_NODE -F 1024 -b -o ${sbam} ori.bam + samtools view -@ $NPROC -F 1024 -b -o ${sbam} ori.bam fi if [[ $nuctype == 'dna' ]]; then module load bedtools/2.26.0 picard/2.10.3 if [[ -z $skiplc ]] then - samtools view -@ $SLURM_CPUS_ON_NODE -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} - samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.ontarget.bam + samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} + samtools index -@ $NPROC ${pair_id}.ontarget.bam samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt java -Xmx64g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt - java -Xmx64g -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt - samtools view -@ $SLURM_CPUS_ON_NODE -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt - samtools view -@ $SLURM_CPUS_ON_NODE ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt + java -Xmx64g -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt + samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt + samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi java -Xmx64g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index cac762d61e7117fc2070f796d0e8adc4b85eadcb..184c787ba5e30d6a2983857fd7d84d26c78ebd95 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -33,9 +33,10 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -z $read_group ]] @@ -61,7 +62,7 @@ else file_opt="${fq1}" fi -bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa $file_opt > out.sam +bwa mem -M -t $NPROC -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa $file_opt > out.sam if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]] then @@ -77,6 +78,6 @@ else fi which samtools -samtools sort -n --threads $SLURM_CPUS_ON_NODE -o output.dups.bam output.unsort.bam +samtools sort -n --threads $NPROC -o output.dups.bam output.unsort.bam java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam samtools index ${pair_id}.bam diff --git a/alignment/hisat_genotype.sh b/alignment/hisat_genotype.sh index 7e0655ecd393c73483cb63201d266c3c554b687c..4ee37c672dea6910e9f0a8e9b8516e55ed4d35b6 100644 --- a/alignment/hisat_genotype.sh +++ b/alignment/hisat_genotype.sh @@ -29,9 +29,10 @@ if [[ -z $pair_id ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi source /etc/profile.d/modules.sh diff --git a/alignment/indexbams.sh b/alignment/indexbams.sh index 38238fb1b337c8051e225e9e0f410ccd3fe2bd8a..2af125dd027eac1ad1ae2cefd1f776e0a9bc17a5 100644 --- a/alignment/indexbams.sh +++ b/alignment/indexbams.sh @@ -18,14 +18,16 @@ shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi + baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh module load samtools/1.6 for i in *.bam; do - samtools index -@ $SLURM_CPUS_ON_NODE ${i} + samtools index -@ $NPROC ${i} done diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 91bcc0a0693d36175d27cde1020742c788fb93e7..ca36c409a959dcdb49a100f271125aeb698d2792 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -28,10 +28,12 @@ if [[ -z $pair_id ]] || [[ -z $sbam ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi + if [[ -z $index_path ]] then index_path="/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref" @@ -46,39 +48,39 @@ module load picard/2.10.3 if [ $algo == 'sambamba' ] then module load speedseq/20160506 - sambamba markdup -t $SLURM_CPUS_ON_NODE ${sbam} ${pair_id}.dedup.bam + sambamba markdup -t $NPROC ${sbam} ${pair_id}.dedup.bam touch ${pair_id}.dedup.stat.txt elif [ $algo == 'samtools' ] then module load samtools/gcc/1.8 - samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam + samtools markdup -s --output-fmt BAM -@ $NPROC sort.bam ${pair_id}.dedup.bam touch ${pair_id}.dedup.stat.txt elif [ $algo == 'picard' ] then - java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt + java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt elif [ $algo == 'picard_umi' ] then - java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt + java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt elif [ $algo == 'fgbio_umi' ] then module load fgbio bwakit/0.7.15 bwa/intel/0.7.17 samtools/gcc/1.8 - samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} + samtools index -@ $NPROC ${sbam} fgbio --tmp-dir ./ GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 fgbio --tmp-dir ./ CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' samtools index ${pair_id}.consensus.bam samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq - bwa mem -M -C -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam + bwa mem -M -C -t $NPROC -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam if [[ ${index_path}/genome.fa.alt ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | samtools view -1 - > ${pair_id}.consensus.bam else samtools view -1 out.sam > ${pair_id}.consensus.bam fi - samtools sort --threads $SLURM_CPUS_ON_NODE -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam + samtools sort --threads $NPROC -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam else cp ${sbam} ${pair_id}.dedup.bam fi module load samtools/gcc/1.8 -samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam +samtools index -@ $NPROC ${pair_id}.dedup.bam diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index d044bda3ea5646968bb06609ec54eb3296816f61..a46a1f75af784aa1d6ef7df386b087e98fe5d39f 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -36,9 +36,10 @@ fi source /etc/profile.d/modules.sh module load samtools/1.6 picard/2.10.3 baseDir="`dirname \"$0\"`" -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi diff $fq1 $fq2 > difffile @@ -59,17 +60,17 @@ else module load hisat2/2.1.0-intel if [ -s difffile ] then - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $NPROC --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt else - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $NPROC --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt fi if [[ $umi == 1 ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam else - samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam + samtools view -1 --threads $NPROC -o output.bam out.sam fi - samtools sort -@ $SLURM_CPUS_ON_NODE -O BAM -o ${pair_id}.bam output.bam + samtools sort -@ $NPROC -O BAM -o ${pair_id}.bam output.bam fi -samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam +samtools index -@ $NPROC ${pair_id}.bam diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 81a58775d29d860f623557b5eb1c9321f423cf9e..a9f984c59e288df27ea45277fb2c4848449564c5 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -31,9 +31,10 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi baseDir="`dirname \"$0\"`" @@ -50,7 +51,7 @@ then fi export TMP_HOME=$tmphome refgeno=${index_path}/CTAT_lib_trinity1.6 - trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${refgeno} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion + trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $NPROC --genome_lib_dir ${refgeno} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt else module add star/2.5.2b diff --git a/genect_rnaseq/geneabundance.sh b/genect_rnaseq/geneabundance.sh index 39aad284f18c21a6c93f1e7c08ac479e893d1c74..5ad0e357a6330121875588edcc7bab863f98e561 100644 --- a/genect_rnaseq/geneabundance.sh +++ b/genect_rnaseq/geneabundance.sh @@ -29,20 +29,21 @@ if [[ -z $pair_id ]] || [[ -z $sbam ]] then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi -if [[ $SLURM_CPUS_ON_NODE > 64 ]] +if [[ $NPROC > 64 ]] then - SLURM_CPUS_ON_NODE=64 + NPROC=64 fi source /etc/profile.d/modules.sh module load subread/1.6.1 export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH -featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} +featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} mkdir ${pair_id}_stringtie cd ${pair_id}_stringtie -stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt +stringtie ../${sbam} -p $NPROC -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt diff --git a/genect_rnaseq/statanal.sh b/genect_rnaseq/statanal.sh index ad5cab388e0f1edbe81cb8af30c4515dc6b6d14a..9cba1e37976c079754bd830ebf1b38acf12dec03 100644 --- a/genect_rnaseq/statanal.sh +++ b/genect_rnaseq/statanal.sh @@ -20,9 +20,10 @@ shift $(($OPTIND -1)) baseDir="`dirname \"$0\"`" # Check for mandatory options -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi source /etc/profile.d/modules.sh diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 23a40e557b0971b737e0173456425824515fff13..a728ad9cc23a8ad0d5015a0d2da36d82b2e6e7c5 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -35,9 +35,10 @@ if [[ -z $pair_id ]] || [[ -z $sbam ]] then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -z $paneldir ]] then diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index 41436e5685ac938f62ec9a4698973a3b1b99f708..d3dd05f3990d9984bce5d86554010b415c873c39 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -30,9 +30,10 @@ then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -a "${index_path}/dbSnp.gatk4.vcf.gz" ]] then @@ -59,7 +60,7 @@ fi source /etc/profile.d/modules.sh module load gatk/4.1.2.0 samtools/gcc/1.8 which samtools -/cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} +/cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${sbam} if [[ $algo == 'gatkbam_rna' ]] then @@ -67,21 +68,21 @@ then java -Xmx4g -jar $PICARD/picard.jar CleanSam INPUT=${sbam} OUTPUT=${pair_id}.clean.bam java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} - samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam + samtools index -@ $NPROC ${pair_id}.rg_added_sorted.bam gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table - /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam + /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table - /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam + /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${pair_id}.final.bam elif [[ $algo == 'abra2' ]] then module load abra2/2.18 mkdir tmpdir - java -Xmx16G -jar /cm/shared/apps/abra2/lib/abra2.jar --in ${sbam} --in-vcf /archive/PHG/PHG_Clinical/phg_workflow/analysis/awesomeproject/GoldIndels.vcf --out ${pair_id}.final.bam --ref ${reffa} --threads $SLURM_CPUS_ON_NODE --tmpdir tmpdir - samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam + java -Xmx16G -jar /cm/shared/apps/abra2/lib/abra2.jar --in ${sbam} --in-vcf /archive/PHG/PHG_Clinical/phg_workflow/analysis/awesomeproject/GoldIndels.vcf --out ${pair_id}.final.bam --ref ${reffa} --threads $NPROC --tmpdir tmpdir + samtools index -@ $NPROC ${pair_id}.final.bam fi diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 018f765406f4ba2e7fe5653f805807794452a7df..b8699b0272243106981dc2093a514fd64a7b8b9e 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -30,9 +30,10 @@ shift $(($OPTIND -1)) if [[ -z $pair_id ]] || [[ -z $index_path ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -s "${index_path}/dbSnp.vcf.gz" ]] then @@ -70,13 +71,13 @@ module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1. for i in *.bam; do if [[ ! -f ${i}.bai ]] then - samtools index -@ $SLURM_CPUS_ON_NODE $i + samtools index -@ $NPROC $i fi done if [[ $algo == 'mpileup' ]] then - threads=`expr $SLURM_CPUS_ON_NODE - 10` + threads=`expr $NPROC - 10` bcftools mpileup --threads $threads -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -A -d 1000000 -C50 -f ${reffa} *.bam | bcftools call -A --threads 10 -vmO z -o ${pair_id}.vcf.gz vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE @@ -88,13 +89,13 @@ then for i in *.bam; do bamlist="$bamlist --bam ${PWD}/${i}" done - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $NPROC "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - elif [[ $algo == 'platypus' ]] then module load platypus/gcc/0.8.1 bamlist=`join_by , *.bam` - Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$NPROC --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz tabix platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz @@ -143,8 +144,8 @@ then gvcflist="$gvcflist --bam ${i}" done configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta - manta/runWorkflow.py -m local -j $SLURM_CPUS_ON_NODE + manta/runWorkflow.py -m local -j $NPROC configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} $mode --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka - strelka/runWorkflow.py -m local -j $SLURM_CPUS_ON_NODE + strelka/runWorkflow.py -m local -j $NPROC bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz fi diff --git a/variants/itdseek.sh b/variants/itdseek.sh index 0ea6ac83510bc896c371ce12edf884a78b48cf37..3af0ea721cdd5485f5a5cc8718cc04a300ec90c9 100755 --- a/variants/itdseek.sh +++ b/variants/itdseek.sh @@ -31,9 +31,10 @@ baseDir="`dirname \"$0\"`" if [[ -z $pair_id ]] || [[ -z $index_path ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -z $snpeffgeno ]] then @@ -56,7 +57,7 @@ source /etc/profile.d/modules.sh module load samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 htslib/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 stexe=`which samtools` -samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz +samtools view -@ $NPROC -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config $snpeffgeno - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz diff --git a/variants/pindel.sh b/variants/pindel.sh index 294d7cd57175d1792a23be3bdc6eabd7bb524275..ebbd3fe316bf36bf42c4aa6ecb3795ea397d9dcb 100755 --- a/variants/pindel.sh +++ b/variants/pindel.sh @@ -29,9 +29,10 @@ baseDir="`dirname \"$0\"`" if [[ -z $pair_id ]] || [[ -z $index_path ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -a "${index_path}/genome.fa" ]] @@ -59,7 +60,7 @@ for i in *.bam; do echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config done -pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP +pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz tabix pindel.vcf.gz diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index d21321a0852fe3b9d0535fdc41273ea58aa67b85..159351e4fb5f53c3b0f2966cbde1614954870320 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -45,10 +45,10 @@ if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then echo $normal $tumor $algo usage fi - -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi #pair_id=${tid}_${nid} if [[ -z $mtumor ]] @@ -104,7 +104,7 @@ fi if [ $algo == 'virmid' ] then module load virmid/1.2 samtools/gcc/1.8 vcftools/0.1.14 - virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10 + virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $NPROC -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf module rm java/oracle/jdk1.7.0_51 @@ -114,7 +114,7 @@ elif [ $algo == 'mutect2' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 - java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} + java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz diff --git a/variants/svcalling.sh b/variants/svcalling.sh index eedc0c9b8f13289abe96c7c3dac3a9f3495024f9..d193da7c81973b6e5b754f246774e97e88a002ef 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -36,9 +36,10 @@ baseDir="`dirname \"$0\"`" if [[ -z $pair_id ]] || [[ -z $index_path ]]; then usage fi -if [[ -z $SLURM_CPUS_ON_NODE ]] +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] then - SLURM_CPUS_ON_NODE=1 + NPROC=`nproc` fi if [[ -a "${index_path}/genome.fa" ]] @@ -99,9 +100,9 @@ if [[ $method == 'svaba' ]] then if [[ -n ${normal} ]] then - /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $SLURM_CPUS_ON_NODE -G ${reffa} -t ${sbam} -n ${normal} -a ${pair_id} + /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $NPROC -G ${reffa} -t ${sbam} -n ${normal} -a ${pair_id} else - /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $SLURM_CPUS_ON_NODE -G ${reffa} -t ${sbam} -a ${pair_id} + /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $NPROC -G ${reffa} -t ${sbam} -a ${pair_id} fi java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz fi @@ -109,20 +110,20 @@ fi if [[ $method == 'lumpy' ]] then #MAKE FILES FOR LUMPY - samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${sbam} + samtools sort -@ $NPROC -n -o namesort.bam ${sbam} samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o splitters.bam - gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o discordants.bam - #RUN LUMPY if [[ -n ${normal} ]] then - samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${normal} + samtools sort -@ $NPROC -n -o namesort.bam ${normal} samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam - gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam - - speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed + speedseq sv -t $NPROC -o lumpy -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed else - speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed + speedseq sv -t $NPROC -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed fi java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz fi @@ -134,9 +135,9 @@ then for i in *.bam; do sname=`echo "$i" |cut -f 1 -d '.'` echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config - samtools index -@ $SLURM_CPUS_ON_NODE $i + samtools index -@ $NPROC $i done - pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP + pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz tabix pindel.vcf.gz @@ -149,7 +150,7 @@ fi if [[ $method == 'itdseek' ]] then stexe=`which samtools` - samtools view -@ $SLURM_CPUS_ON_NODE -L ${bed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz + samtools view -@ $NPROC -L ${bed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz diff --git a/variants/union.sh b/variants/union.sh index 6c2edeefa94f7bfe8fa4e69a4d41f77dfc76926a..5dd626ce70070000a2e75e83380b5288a1aa110d 100755 --- a/variants/union.sh +++ b/variants/union.sh @@ -41,7 +41,7 @@ for i in ${dir}/*.vcf.gz; do if [[ $i == $HS ]] then bcftools norm -m - -O z -o hotspot.norm.vcf.gz $i - java -jar /cm/shared/apps/snpeff/4.3q/SnpSift.jar filter "(GEN[*].AD[1] > 3)" hotspot.norm.vcf.gz |bgzip > hotspot.lowfilt.vcf.gz + java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].AD[1] > 3)" hotspot.norm.vcf.gz |bgzip > hotspot.lowfilt.vcf.gz bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a hotspot.lowfilt.vcf.gz -b stdin |bgzip > nooverlap.hotspot.vcf.gz list2="$list2 nooverlap.hotspot.vcf.gz" fi