Skip to content
Snippets Groups Projects
Commit b4be1926 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '3-trimming' into 'develop'

Added trimming step using trimGalore and logging.

Closes #3

See merge request !3
parents f6b9f1f2 b00a62fe
Branches
Tags
4 merge requests!37v0.0.1,!6Develop,!4Develop,!3Added trimming step using trimGalore and logging.
...@@ -5,10 +5,14 @@ process { ...@@ -5,10 +5,14 @@ process {
// Process specific configuration // Process specific configuration
withName:splitData { withName:splitData {
container = 'docker://bicf/bdbag:1.0' container = 'docker://bicf/gudmaprbkfilexfer:1.0'
} }
withName:getData { withName:getData {
container = 'docker://bicf/bdbag:1.0' container = 'docker://bicf/gudmaprbkfilexfer:1.0'
}
withName:trimData {
container = 'docker://bicf/trimgalore:1.0'
queue = '256GB,256GBv1,384GB'
} }
} }
......
...@@ -13,12 +13,15 @@ bdbag = Channel ...@@ -13,12 +13,15 @@ bdbag = Channel
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir outDir = params.outDir
logsDir = "${outDir}/Logs"
/* /*
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid * splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
*/ */
process splitData { process splitData {
tag "${bdbag.baseName}" tag "${bdbag.baseName}"
executor 'local'
publishDir "${logsDir}/splitData", mode: 'symlink', pattern: "${bdbag.baseName}.splitData.err"
input: input:
file bdbag file bdbag
...@@ -29,23 +32,24 @@ process splitData { ...@@ -29,23 +32,24 @@ process splitData {
file("${bdbag.baseName}/data/File.csv") into fileMeta file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
file ("${bdbag.baseName}.splitData.err")
script: script:
""" """
hostname hostname >> ${bdbag.baseName}.splitData.err
ulimit -a ulimit -a >> ${bdbag.baseName}.splitData.err
ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt 2>>${bdbag.baseName}.splitData.err
echo "LOG: deriva cookie linked" echo "LOG: deriva cookie linked" >> ${bdbag.baseName}.splitData.err
study=`echo "${bdbag}" | cut -d '.' -f1` study=`echo "${bdbag}" | cut -d '.' -f1` 2>>${bdbag.baseName}.splitData.err
echo "LOG: \${study}" echo "LOG: \${study}" >> ${bdbag.baseName}.splitData.err
unzip ${bdbag} unzip ${bdbag} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bdgag unzipped" echo "LOG: bdgag unzipped" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file filtered for only .fastq.gz" echo "LOG: fetch file filtered for only .fastq.gz" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file split by replicates" echo "LOG: fetch file split by replicates" >> ${bdbag.baseName}.splitData.err
sh ${baseDir}/scripts/splitBag.sh \${study} sh ${baseDir}/scripts/splitBag.sh \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bag recreated with replicate split fetch file" echo "LOG: bag recreated with replicate split fetch file" >> ${bdbag.baseName}.splitData.err
""" """
} }
...@@ -54,24 +58,45 @@ process splitData { ...@@ -54,24 +58,45 @@ process splitData {
*/ */
process getData { process getData {
tag "${rep.baseName}" tag "${rep.baseName}"
publishDir "${outDir}/tempOut/fastqs", mode: "symlink" publishDir "${logsDir}/getData", mode: 'symlink', pattern: "${rep.baseName}.getData.err"
input: input:
each rep from bdbagSplit each rep from bdbagSplit
output: output:
path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq set val ("${rep.baseName}"), file ("*.R{1,2}.fastq.gz") into trimming
script: script:
""" """
hostname hostname >>${rep.baseName}.getData.err
ulimit -a ulimit -a >>${rep.baseName}.getData.err
export https_proxy=\${http_proxy} export https_proxy=\${http_proxy}
replicate=\$(basename "${rep}" | cut -d '.' -f1) replicate=\$(basename "${rep}" | cut -d '.' -f1)
echo "LOG: \${replicate}" echo "LOG: \${replicate}" >>${rep.baseName}.getData.err
unzip ${rep} unzip ${rep} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag unzipped" echo "LOG: replicate bdbag unzipped" >>${rep.baseName}.getData.err
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag fetched" echo "LOG: replicate bdbag fetched" >>${rep.baseName}.getData.err
""" """
} }
/*
* trimData: trims any adapter or non-host sequences from the data
*/
process trimData {
tag "trim-${repID}"
publishDir "${outDir}/tempOut/trimmed", mode: "symlink", pattern: "*_val_{1,2}.fq.gz"
publishDir "${logsDir}/trimData", mode: 'symlink', pattern: "\${rep}.trimData.*"
input:
set repID, reads from trimming
output:
path ("*_val_{1,2}.fq.gz", type: 'file', maxDepth: '0')
script:
"""
rep=`echo ${repID} | cut -f2- -d '_'`;
trim_galore --gzip --max_n 1 --paired --basename \${rep} -j `nproc` ${reads[0]} ${reads[1]} 1>>\${rep}.trimData.log 2>>\${rep}.trimData.err;
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment