diff --git a/nextflow.config b/nextflow.config index 1a935aa991270f04f369e2b3d212498d2da076f4..a1537145796646923e024c2593e06415de915873 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,6 +1,62 @@ -process.executor='slurm' -process.queue='128GB,256GB,256GBv1' -process.clusterOptions = '--hold' -trace.enabled = false -trace.file = 'pipeline_trace.txt' -trace.field = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime' +params { + repoDir='/seqprg' +} +process { + executor = 'slurm' + clusterOptions = '--hold --no-kill' + queue = '128GB,256GB,256GBv1' + withLabel: trim { + container = 'goalconsortium/trim_galore:1.0.4' + } + withLabel: dnaalign { + container = 'goalconsortium/dna_alignment:1.0.4' + } + withLabel: profiling_qc { + container = 'goalconsortium/profiling_qc:1.0.4' + } + withName: starfusion { + container = 'goalconsortium/starfusion:1.0.4' + } + withName: ralign { + container = 'goalconsortium/rna_alignment:1.0.4' + } + withName: geneabund { + container = 'goalconsortium/rna_gene_abundance:1.0.4' + } +} + +singularity { + enabled = true + runOptions='--no-home --cleanenv' + cacheDir = '/project/shared/bicf_workflow_ref/seqprg/singularity/' +} + +trace { + enabled = true + file = 'pipeline_trace.txt' + field = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime' +} + +timeline { + enabled = false + file = 'timeline.html' +} + +report { + enabled = false + file = 'report.html' +} + +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' +} + +manifest { + homePage = 'https://git.biohpc.swmed.edu/ngsclialab/school' + description = 'School is a collection of genomics analysis workflows that are used for detecting single nucleotide variants (SNVs), insertions/deletions (indels), copy number variants (CNVs) and translocations from RNA and DNA sequencing. These workflows have been validated in a CLIA laboratory at UTSW' + mainScript = 'rna.nf' + version = '1.0.0' + nextflowVersion = '>=0.31.0' +} diff --git a/workflow/main.nf b/workflow/main.nf index e158cd4da0a28104dfcad436f80ddf3262a8657f..1f45983a82fd7f2527bc9bd0548a14f4d23ac598 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -23,6 +23,11 @@ indel="$params.genome/GoldIndels.vcf.gz" knownindel=file(indel) dbsnp=file(dbsnp) +repoDir=workflow.projectDir +if (params.repoDir) { + repoDir=params.repoDir +} + // params genome is the directory // base name for the index is always genome index_path = file(params.genome) @@ -75,18 +80,20 @@ if( ! read ) { error "Didn't match any input files with entries in the design fi // Trim raw reads using trimgalore process trim { errorStrategy 'ignore' + label 'trim' input: set pair_id, file(fqs) from read output: set pair_id, file("${pair_id}.trim.R*.fastq.gz") into trimread script: """ - bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -f -p ${pair_id} ${fqs} + bash $repoDir/process_scripts/preproc_fastq/trimgalore.sh -f -p ${pair_id} ${fqs} """ } -process align { +process ralign { errorStrategy 'ignore' + label 'ralign' publishDir "$params.output", mode: 'copy' input: set pair_id, file(fqs) from trimread @@ -96,12 +103,13 @@ process align { file("${pair_id}.alignerout.txt") into hsatout script: """ - bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} ${fqs} + bash $repoDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} ${fqs} """ } process alignqc { errorStrategy 'ignore' + label 'profiling_qc' publishDir "$params.output", mode: 'copy' input: set pair_id, file(bam) from aligned2 @@ -110,13 +118,14 @@ process alignqc { set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc script: """ - bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna + bash $repoDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna """ } // Identify duplicate reads with Picard process markdups { publishDir "$params.output", mode: 'copy' + label 'dnaalign' input: set pair_id, file(sbam) from aligned output: @@ -124,7 +133,7 @@ process markdups { set pair_id, file("${pair_id}.dedup.bam") into deduped2 script: """ - bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id + bash $repoDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id """ } @@ -132,6 +141,7 @@ process markdups { // Assemble transcripts with stringtie process geneabund { errorStrategy 'ignore' + label 'geneabund' publishDir "$params.output", mode: 'copy' input: set pair_id, file(sbam) from deduped1 @@ -142,7 +152,7 @@ process geneabund { file("${pair_id}.fpkm.txt") into fpkm script: """ - bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam} + bash $repoDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam} """ }