params.deriva = "${baseDir}/../test_data/deriva-cookies.txt"
params.bdbag = "${baseDir}/../test_data/"
params.outDir = "${baseDir}/../output"
// Parse input variables
bdbag = Channel
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
process splitData {
tag "${bdbag.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink"
file("Replicate_*.zip") into bdbagSplit mode flatten
file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
echo LOG: deriva cookie linked
study=`echo "${bdbag}" | cut -d'.' -f1`
echo LOG: \${study}
unzip ${bdbag}
echo LOG: bdgag unzipped
python3 ${baseDir}/scripts/ --fetchFile \${study}
echo LOG: fetch file filtered for only .fastq.gz
python3 ${baseDir}/scripts/ --fetchFile \${study}
echo LOG: fetch file split by replicates
sh ${baseDir}/scripts/ \${study}
echo LOG: bag recreated with replicate split fetch file
println {${http_proxy}}
println {${https_proxy}}
* getData: fetch study files from consortium with downloaded
process getData {
tag "${rep.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink"
file("**/*.R*.fastq.gz") into fastq
ulimit -a
echo LOG:\${http_proxy}
export https_proxy=\${http_proxy}
replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev)
echo LOG: \${replicate}
unzip ${rep}
echo LOG: replicate bdbag unzipped
sh ${baseDir}/scripts/ \${replicate}
echo LOG: replicate bdbag fetched