Newer
Older
params.deriva = "${baseDir}/../test_data/deriva-cookies.txt"
params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
params.outDir = "${baseDir}/../output"
// Parse input variables
deriva = Channel
.fromPath(params.deriva)
.ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
bdbag = Channel
.fromPath(params.bdbag)
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir
/*
* splitData: split bdbag files by replicate so fetch can occure in parallel
process splitData {
tag "${bdbag.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink"
input:
file("Replicate_*.zip") into bdbagSplit mode flatten
file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
study=\$(echo "${bdbag}" | cut -d'.' -f1)
echo LOG: \${study}
echo LOG: bdgag unzipped
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
echo LOG: fetch file filtered for only .fastq.gz
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
echo LOG: fetch file split by replicates
sh ${baseDir}/scripts/splitBag.sh \${study}
echo LOG: bag recreated with replicate split fetch file
"""
}
/*
* getData: fetch study files from consortium with downloaded bdbag.zip
*/
process getData {
tag "${rep.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink"
input:
file deriva
each rep from bdbagSplit
output:
file("**/*.R*.fastq.gz") into fastq
script:
"""
hostname
ulimit -a
replicate=\$(echo "${rep}" | cut -d'.' -f1)
echo LOG: \${replicate}
cp "${deriva}" ~/.bdbag/deriva-cookies.txt
echo LOG: deriva cookie loaded
unzip ${rep}
echo LOG: replicate bdbag unzipped
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
echo LOG: replicate bdbag fetched
sh ${baseDir}/scripts/renameFastq.sh \${replicate}
echo LOG: fastq.gz files renamed to replicate RID