#!/usr/bin/env nextflow // Path to an input file, or a pattern for multiple inputs // Note - $baseDir is the location of this workflow file main.nf // Define Input variables params.name = "run" params.bcl = "${baseDir}/../test_data/*.tar.gz" params.designFile = "${baseDir}/../test_data/design.csv" params.outDir = "${baseDir}/output" params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml" params.references = "${baseDir}/../docs/references.md" // Define List of Files tarList = Channel .fromPath( params.bcl ) bclCount = Channel .fromPath( params.bcl ) .count() // Define regular variables name = params.name designLocation = Channel .fromPath(params.designFile) .ifEmpty { exit 1, "design file not found: ${params.designFile}" } outDir = params.outDir multiqcConf = params.multiqcConf references = params.references process checkDesignFile { tag "${name}" publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy' module 'python/3.6.1-2-anaconda' input: file designLocation output: file("design.checked.csv") into designPaths file("design.checked.csv") into designCount script: """ hostname ulimit -a python3 ${baseDir}/scripts/check_design.py -d ${designLocation} """ } process untarBCL { tag "${tar}" publishDir "${outDir}/${task.process}", mode: 'copy' module 'pigz/2.4' input: file tar from tarList output: file("*") into bclPaths mode flatten script: """ hostname ulimit -a bash ${baseDir}/scripts/untarBCL.sh -t ${tar} """ } process mkfastq { tag "${bcl.baseName}" queue '128GB,256GB,256GBv1,384GB' publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}" module 'cellranger/3.0.2:bcl2fastq/2.19.1' input: each bcl from bclPaths.collect() file design from designPaths output: file("**/outs/**/*.fastq.gz") into fastqPaths file("**/outs/**/*.fastq.gz") into cellrangerCount file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths val "${bcl.baseName}" into bclName script: """ hostname ulimit -a cellranger mkfastq --id=${bcl.baseName} --run=${bcl} --csv=${design} -r \$SLURM_CPUS_ON_NODE -p \$SLURM_CPUS_ON_NODE -w \$SLURM_CPUS_ON_NODE """ } if (bclCount.value == 1) { process countDesign { tag "${name}" publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy' input: file fastqs from cellrangerCount.collect() file design from designCount output: file("Cellranger_Count_Design.csv") into CountDesign script: """ bash ${baseDir}/scripts/countDesign.sh """ } } process fastqc { tag "${bclName}" queue 'super' publishDir "${outDir}/misc/${task.process}/${name}/${bclName}", mode: 'copy', pattern: "{*fastqc.zip}" module 'fastqc/0.11.5:parallel' input: file fastqPaths val bclName output: file("*fastqc.zip") into fqcPaths script: """ hostname ulimit -a find *.fastq.gz -exec mv {} ${bclName}.{} \\; bash ${baseDir}/scripts/fastqc.sh """ } process versions { tag "${name}" publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy' module 'python/3.6.1-2-anaconda:cellranger/3.0.2:bcl2fastq/2.19.1:fastqc/0.11.5:pandoc/2.7' input: output: file("*.yaml") into yamlPaths script: """ hostname ulimit -a echo ${workflow.nextflow.version} > version_nextflow.txt bash ${baseDir}/scripts/versions_mkfastq.sh bash ${baseDir}/scripts/versions_fastqc.sh python3 ${baseDir}/scripts/generate_versions.py -f version_*.txt -o versions python3 ${baseDir}/scripts/generate_references.py -r ${references} -o references """ } process multiqc { tag "${name}" queue 'super' publishDir "${outDir}/${task.process}/${name}", mode: 'copy', pattern: "{multiqc*}" module 'multiqc/1.7' input: file bqc name "bqc/?/*" from bqcPaths.collect() file fqc name "fqc/*" from fqcPaths.collect() file yamlPaths output: file("multiqc_report.html") into mqcPaths script: """ hostname ulimit -a multiqc -c ${multiqcConf} . """ }