#!/usr/bin/env nextflow // Define input variables params.deriva = "${baseDir}/../test_data/deriva-cookies.txt" params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip" params.outDir = "${baseDir}/../output" // Parse input variables deriva = Channel .fromPath(params.deriva) .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" } bdbag = Channel .fromPath(params.bdbag) .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } outDir = params.outDir /* * splitData: split bdbag files by replicate so fetch can occure in parallel */ process splitData { tag "${bdbag.baseName}" publishDir "${outDir}/temp/${task.process}", mode: "symlink" input: file bdbag output: file("Replicate_*.zip") into bdbagSplit mode flatten file("${bdbag.baseName}/data/File.csv") into fileMeta file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta script: """ hostname ulimit -a study=\$(echo "${bdbag}" | cut -d'.' -f1) echo LOG: \${study} unzip ${bdbag} echo LOG: bdgag unzipped python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} echo LOG: fetch file filtered for only .fastq.gz python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} echo LOG: fetch file split by replicates sh ${baseDir}/scripts/splitBag.sh \${study} echo LOG: bag recreated with replicate split fetch file """ } /* * getData: fetch study files from consortium with downloaded bdbag.zip */ process getData { tag "${rep.baseName}" publishDir "${outDir}/temp/${task.process}", mode: "symlink" input: file deriva each rep from bdbagSplit output: file("**/*.R*.fastq.gz") into fastq script: """ hostname ulimit -a replicate=\$(echo "${rep}" | cut -d'.' -f1) echo LOG: \${replicate} cp "${deriva}" ~/.bdbag/deriva-cookies.txt echo LOG: deriva cookie loaded unzip ${rep} echo LOG: replicate bdbag unzipped sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} echo LOG: replicate bdbag fetched sh ${baseDir}/scripts/renameFastq.sh \${replicate} echo LOG: fastq.gz files renamed to replicate RID """ }