From 8712a6ed5b521712832e7b562302d66543f6cf7e Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 12 Aug 2020 11:23:07 -0500 Subject: [PATCH] fb parallel options for BioHPC/Cloud --- alignment/rnaseqalign.sh | 53 ++++++++++++++++++++++++++++++---------- variants/germline_vc.sh | 7 +++--- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index a46a1f7..1e590c9 100755 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -29,12 +29,16 @@ done shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then +if [[ -z $pair_id ]] +then usage fi -source /etc/profile.d/modules.sh -module load samtools/1.6 picard/2.10.3 +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load samtools/1.6 picard/2.10.3 +fi baseDir="`dirname \"$0\"`" NPROC=$SLURM_CPUS_ON_NODE if [[ -z $NPROC ]] @@ -42,28 +46,51 @@ then NPROC=`nproc` fi +fqs='' +i=0 +numfq=${#fqs[@]} +while [[ $i -le $numfq ]] +do + fqs="$fqs $1" + i=$((i + 1)) + shift 1 +done +hisat_opt='' diff $fq1 $fq2 > difffile +if [[ -f $fq1 ]] +then + fqs="$fq1" +fi +if [[ -f $fq2 ]] && [[ -s difffile ]] +then + fqs+=" $fq2" +fi +numfq=${#fqs[@]} + +star_opt=$fqs +fqarray=($fqs) +if [[ $numfq == 1 ]] +then + hisat_opt="-1 ${fqarray[0]} -2 ${fqarray[1]}" +else + hisat_opt="-U $fqarray[0]" +fi if [ $algo == 'star' ] then - if [ -s difffile ] + if [[ -z $isdocker ]] then module load star/2.4.2a - STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 $fq2 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out - else - module load star/2.4.2a - STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out fi + STAR --genomeDir ${index_path}/star_index/ --readFilesIn $star_opt --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out mv outLog.final.out ${pair_id}.alignerout.txt mv outAligned.sortedByCoord.out.bam ${pair_id}.bam else - module load hisat2/2.1.0-intel - if [ -s difffile ] + if [[ -z $isdocker ]] then - hisat2 -p $NPROC --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt - else - hisat2 -p $NPROC --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt + module load hisat2/2.1.0-intel fi + hisat2 -p $NPROC --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome $hisat_opt -S out.sam --summary-file ${pair_id}.alignerout.txt if [[ $umi == 1 ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index ab1e772..59c5d05 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -86,14 +86,15 @@ then if [[ -z $isdocker ]] then module load freebayes/gcc/1.2.0 parallel/20150122 + paropt="--delay 2 -j $NPROC" + else + paropt="--delay 1 --jobs 0 --memfree 2G" fi bamlist='' for i in *.bam; do bamlist="$bamlist --bam ${PWD}/${i}" done - #--memfree 2G for DNANexus - - cut -f 1 $fbsplit | parallel --delay 1 --jobs 0 "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + cut -f 1 $fbsplit | parallel ${paropt} "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.fb.vcf.gz - elif [[ $algo == 'platypus' ]] then -- GitLab