#!/usr/bin/env nextflow /* main.nf * * -------------------------------------------------------------------------- * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/LICENSE) * -------------------------------------------------------------------------- * */ // ######## #### ###### ######## // ## ## ## ## ## ## // ## ## ## ## ## // ######## ## ## ###### // ## ## ## ## ## // ## ## ## ## ## ## // ######## #### ###### ## // Define input variables params.name = "run" params.bcl = "${baseDir}/../test_data/simple1/*.tar.gz" params.designFile = "${baseDir}/../test_data/single1/cellranger-tiny-bcl-simple-1_2_0.csv" params.mask = "" params.astrocyte = false params.outDir = "${baseDir}/output" // Define list of files tarList = Channel .fromPath( params.bcl ) bclCount = Channel .fromPath( params.bcl ) .count() // Define regular variables pipelineVersion = "2.1.5" name = params.name designLocation = Channel .fromPath(params.designFile) .ifEmpty { exit 1, "design file not found: ${params.designFile}" } mask = params.mask outDir = params.outDir // Define script files check_designScript = Channel.fromPath("$baseDir/scripts/check_design.py") untarBCLScript = Channel.fromPath("$baseDir/scripts/untarBCL.sh") countDesignScript = Channel.fromPath("$baseDir/scripts/countDesign.sh") fastqcScript = Channel.fromPath("$baseDir/scripts/fastqc.sh") versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py") referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py") versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh") //versions_pigzScript = Channel.fromPath("$baseDir/scripts/versions_pigz.sh") versions_cellrangerScript = Channel.fromPath("$baseDir/scripts/versions_cellranger.sh") versions_bcl2fastqScript = Channel.fromPath("$baseDir/scripts/versions_bcl2fastq.sh") versions_fastqcScript = Channel.fromPath("$baseDir/scripts/versions_fastqc.sh") // Define report files multiqcConf = Channel.fromPath("${baseDir}/configs/multiqc_config.yaml") references = Channel.fromPath("${baseDir}/../docs/references.md") /* * trackStart: track start of pipeline */ params.ci = false params.dev = false /* process trackStart { script: """ hostname ulimit -a export https_proxy=\${http_proxy} curl -H 'Content-Type: application/json' -X PUT -d '{ \ "sessionId": "${workflow.sessionId}", \ "pipeline": "cellranger_mkfastq", \ "pipelineVersion": "${pipelineVersion}", \ "start": "${workflow.start}", \ "astrocyte": ${params.astrocyte}, \ "status": "started", \ "nextflowVersion": "${workflow.nextflow.version}", "ci": ${params.ci}, "dev": ${params.dev}}' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ } */ process checkDesignFile { tag "${name}" input: file check_designScript file versions_pythonScript file designLocation output: file("design.checked.csv") into designPaths file("design.checked.csv") into designCount file("version_pipeline.txt") into version_pipeline file("version_nextflow.txt") into version_nextflow file("version_python.txt") into version_python script: """ hostname ulimit -a noSpaceDesign=\$(echo "${designLocation}" | tr -d ' ') if [[ "\${noSpaceDesign}" != "${designLocation}" ]]; then mv "${designLocation}" "\${noSpaceDesign}" fi python3 check_design.py -d \${noSpaceDesign} bash versions_python.sh > version_python.txt echo "${workflow.nextflow.version}" > version_nextflow.txt echo "${pipelineVersion}" > version_pipeline.txt """ } /* nextflow workflow manifest version calls that aren't compatible with Asrcocyte echo "${workflow.manifest.version}" > version_pipeline.txt */ process untarBCL { tag "${tar.simpleName}" input: file untarBCLScript //file versions_pigzScript each file(tar) from tarList output: file("*[!version_pigz.txt]") into bclPaths mode flatten //file("version_pigz.txt") into version_pigz script: """ hostname ulimit -u 16384 ulimit -a bash untarBCL.sh -t ${tar} #bash versions_pigz.sh > version_pigz.txt """ } process mkfastq { tag "${bcl.simpleName}" publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}" queue '128GB,256GB,256GBv1,384GB' input: file versions_cellrangerScript file versions_bcl2fastqScript each file(bcl) from bclPaths.collect() file design from designPaths output: file("fq/${bcl.simpleName}/*.fastq.gz") into fastqPaths val "${bcl.simpleName}" into bclName file("**/outs/**/*.fastq.gz") into cellrangerCount mode flatten file("**/outs/fastq_path/Stats/*") into bqcPaths file("version_cellranger.txt") into version_cellranger file("version_bcl2fastq.txt") into version_bcl2fastq script: """ hostname ulimit -u 16384 ulimit -a cellranger mkfastq --id=mkfastq_${bcl.simpleName} --run=${bcl} --csv=${design} --ignore-dual-index ${mask} mkdir fq mkdir "fq/${bcl.simpleName}" find . -name "*.fastq.gz" -exec cp {} fq/${bcl.simpleName}/ \\; bash versions_cellranger.sh > version_cellranger.txt bash versions_bcl2fastq.sh > version_bcl2fastq.txt """ } if (bclCount.value == 1) { process countDesign { tag "${name}" publishDir "${outDir}/${task.process}/${name}", mode: 'copy' input: file countDesignScript file fastqs from cellrangerCount.collect() file design from designCount output: file("Cellranger_Count_Design.csv") into CountDesign script: """ hostname ulimit -a bash countDesign.sh """ } } process fastqc { tag "${bcl}" input: file fastqcScript file versions_fastqcScript file(fastq) from fastqPaths each bcl from bclName output: file("*fastqc.zip") into fqcPaths mode flatten file("version_fastqc.txt") into version_fastqc script: """ hostname ulimit -u 16384 ulimit -a find *.fastq.gz -exec mv {} ${bcl}.{} \\; bash fastqc.sh bash versions_fastqc.sh > version_fastqc.txt """ } process versions { tag "${name}" input: file versionsScript file referencesScript file version_pipeline file version_nextflow file version_python //file version_pigz file version_cellranger file version_bcl2fastq file version_fastqc file references output: file("*.yaml") into yamlPaths script: """ hostname ulimit -a python3 generate_versions.py -f version_*.txt -o versions python3 generate_references.py -r ${references} -o references """ } process multiqc { tag "${name}" publishDir "${outDir}/${task.process}/${name}", mode: 'copy', pattern: "{multiqc*}" input: file multiqcConf file bqc name "bqc/?/*" from bqcPaths.collect() file fqc name "fqc/?/*" from fqcPaths.collect() file yamlPaths output: file("multiqc_report.html") into mqcPaths script: """ hostname ulimit -a #export LC_ALL=C.UTF-8 #export LANG=C.UTF-8 multiqc -c ${multiqcConf} . """ }