Skip to content
Snippets Groups Projects
Commit 41a2c611 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge develop into branch

parents 8c5b5a80 d499ea11
Branches
Tags
3 merge requests!37v0.0.1,!14Resolve "process_createManifest",!13Develop
Pipeline #5730 failed with stages
in 8 minutes and 16 seconds
...@@ -15,14 +15,7 @@ deriva = Channel ...@@ -15,14 +15,7 @@ deriva = Channel
bdbag = Channel bdbag = Channel
.fromPath(params.bdbag) .fromPath(params.bdbag)
.ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" } .ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" }
repRID = params.repRID
Channel.from(params.repRID)
.into {
repRID_getBag
repRID_getData
repRID_parseMetadata
repRID_trimData
}
outDir = params.outDir outDir = params.outDir
logsDir = "${outDir}/Logs" logsDir = "${outDir}/Logs"
...@@ -30,30 +23,36 @@ logsDir = "${outDir}/Logs" ...@@ -30,30 +23,36 @@ logsDir = "${outDir}/Logs"
// Define fixed files // Define fixed files
derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json") derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
// Define script files
script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
/* /*
* getData: get bagit file from consortium * getData: get bagit file from consortium
*/ */
process getBag { process getBag {
tag "${repRID_getBag}" tag "${repRID}"
publishDir "${logsDir}/getBag", mode: 'symlink', pattern: "${repRID_getBag}.getBag.err" publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.getBag.err"
input: input:
val repRID_getBag
path credential, stageAs: 'credential.json' from deriva path credential, stageAs: 'credential.json' from deriva
path derivaConfig path derivaConfig
output: output:
path ("Replicate_*.zip") into bagit path ("Replicate_*.zip") into bagit
file ("${repRID_getBag}.getBag.err") file ("${repRID}.getBag.err")
script: script:
""" """
hostname >>${repRID_getBag}.getBag.err hostname >>${repRID}.getBag.err
ulimit -a >>${repRID_getBag}.getBag.err ulimit -a >>${repRID}.getBag.err
export https_proxy=\${http_proxy} export https_proxy=\${http_proxy}
ln -sf `readlink -e credential.json` ~/.deriva/credential.json 2>>${repRID_getBag}.getBag.err
echo "LOG: deriva credentials linked" >>${repRID_getBag}.getBag.err # link credential file for authentication
deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID_getBag} 2>>${repRID_getBag}.getBag.err ln -sf `readlink -e credential.json` ~/.deriva/credential.json 2>>${repRID}.getBag.err
echo "LOG: deriva credentials linked" >>${repRID}.getBag.err
# deriva-download replicate RID
deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID} 2>>${repRID}.getBag.err
""" """
} }
...@@ -61,11 +60,11 @@ process getBag { ...@@ -61,11 +60,11 @@ process getBag {
* getData: fetch study files from consortium with downloaded bdbag.zip * getData: fetch study files from consortium with downloaded bdbag.zip
*/ */
process getData { process getData {
tag "${repRID_getData}" tag "${repRID}"
publishDir "${logsDir}/getData", mode: 'symlink', pattern: "${repRID_getData}.getData.err" publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.getData.err"
input: input:
val repRID_getData path script_bdbagFetch
path cookies, stageAs: 'deriva-cookies.txt' from bdbag path cookies, stageAs: 'deriva-cookies.txt' from bdbag
path bagit path bagit
...@@ -74,21 +73,29 @@ process getData { ...@@ -74,21 +73,29 @@ process getData {
file("**/File.csv") into fileMeta file("**/File.csv") into fileMeta
file("**/Experiment Settings.csv") into experimentSettingsMeta file("**/Experiment Settings.csv") into experimentSettingsMeta
file("**/Experiment.csv") into experimentMeta file("**/Experiment.csv") into experimentMeta
file ("${repRID_getData}.getData.err") file ("${repRID}.getData.err")
script: script:
""" """
hostname >>${repRID_getData}.getData.err hostname >>${repRID}.getData.err
ulimit -a >>${repRID_getData}.getData.err ulimit -a >>${repRID}.getData.err
export https_proxy=\${http_proxy} export https_proxy=\${http_proxy}
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt >>${repRID_getData}.getData.err
echo "LOG: deriva cookie linked" >>${repRID_getData}.getData.err # link deriva cookie for authentication
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt >>${repRID}.getData.err
echo "LOG: deriva cookie linked" >>${repRID}.getData.err
# get bagit basename
replicate=\$(basename "${bagit}" | cut -d '.' -f1) replicate=\$(basename "${bagit}" | cut -d '.' -f1)
echo "LOG: \${replicate}" >>${repRID_getData}.getData.err echo "LOG: \${replicate}" >>${repRID}.getData.err
unzip ${bagit} 2>>${repRID_getData}.getData.err
echo "LOG: replicate bdbag unzipped" >>${repRID_getData}.getData.err # unzip bagit
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} ${repRID_getData} 2>>${repRID_getData}.getData.err unzip ${bagit} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag fetched" >>${repRID_getData}.getData.err echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err
# bagit fetch fastq's only and rename by repRID
sh bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err
""" """
} }
...@@ -96,11 +103,11 @@ process getData { ...@@ -96,11 +103,11 @@ process getData {
* parseMetadata: parses metadata to extract experiment parameters * parseMetadata: parses metadata to extract experiment parameters
*/ */
process parseMetadata { process parseMetadata {
tag "${repRID_parseMetadata}" tag "${repRID}"
publishDir "${logsDir}", mode: 'copy', pattern: "${repRID_parseMetadata}.parseMetadata.err" publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.parseMetadata.err"
input: input:
val repRID_parseMetadata val repRID
path fileMeta path fileMeta
path experimentSettingsMeta path experimentSettingsMeta
path experimentMeta path experimentMeta
...@@ -113,28 +120,28 @@ process parseMetadata { ...@@ -113,28 +120,28 @@ process parseMetadata {
script: script:
""" """
hostname >>${repRID_parseMetadata}.parseMetadata.err hostname >>${repRID}.parseMetadata.err
ulimit -a >>${repRID_parseMetadata}.parseMetadata.err ulimit -a >>${repRID}.parseMetadata.err
# Check replicate RID metadata # Check replicate RID metadata
rep=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${fileMeta}" -p repRID) rep=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${fileMeta}" -p repRID)
echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID_parseMetadata}.parseMetadata.err echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID}.parseMetadata.err
# Get endedness metadata # Get endedness metadata
ends=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p ends) ends=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p ends)
echo "LOG: endedness metadata parsed: \${ends}" >>${repRID_parseMetadata}.parseMetadata.err echo "LOG: endedness metadata parsed: \${ends}" >>${repRID}.parseMetadata.err
# Get strandedness metadata # Get strandedness metadata
stranded=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p stranded) stranded=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p stranded)
echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID_parseMetadata}.parseMetadata.err echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID}.parseMetadata.err
# Get spike-in metadata # Get spike-in metadata
spike=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p spike) spike=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p spike)
echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID_parseMetadata}.parseMetadata.err echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID}.parseMetadata.err
# Get species metadata # Get species metadata
specie=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentMeta}" -p specie) specie=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentMeta}" -p specie)
echo "LOG: species metadata parsed: \${specie}" >>${repRID_parseMetadata}.parseMetadata.err echo "LOG: species metadata parsed: \${specie}" >>${repRID}.parseMetadata.err
""" """
} }
...@@ -146,32 +153,32 @@ ends.set { ...@@ -146,32 +153,32 @@ ends.set {
* trimData: trims any adapter or non-host sequences from the data * trimData: trims any adapter or non-host sequences from the data
*/ */
process trimData { process trimData {
tag "${repRID_trimData}" tag "${repRID}"
publishDir "${logsDir}/trimData", mode: 'symlink', pattern: "\${repRID_trimData}.trimData.*" publishDir "${logsDir}", mode: 'copy', pattern: "\${repRID}.trimData.*"
input: input:
val repRID_trimData
file(fastq) from fastqs file(fastq) from fastqs
val ends_trimData val ends_trimData
output: output:
path ("*.fq.gz") into fastqs_trimmed path ("*.fq.gz") into fastqs_trimmed
file ("${repRID_trimData}.trimData.log") val ends
file ("${repRID_trimData}.trimData.err") file ("${repRID}.trimData.log")
file ("${repRID}.trimData.err")
script: script:
""" """
if [ `nproc` -gt 8 ] hostname >>${repRID}.trimData.err
then ulimit -a >>${repRID}.trimData.err
ncore=8
else # trim fastqs
ncore=`nproc`
fi
if [ '${ends_trimData}' == 'se' ] if [ '${ends_trimData}' == 'se' ]
then then
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err; ends='se'
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err;
else else
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} ${fastq[1]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err; ends='pe'
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err;
fi fi
""" """
} }
\ No newline at end of file
...@@ -9,7 +9,10 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -9,7 +9,10 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../' '/../../'
@pytest.mark.trimData @pytest.mark.trimData
def test_trimData(): def test_trimData_se():
assert os.path.exists(os.path.join(test_output_path, '16-1ZX4_trimmed.fq.gz')) assert os.path.exists(os.path.join(test_output_path, '16-1ZX4_trimmed.fq.gz'))
@pytest.mark.trimData
def test_trimData_pe():
assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_R1_val_1.fq.gz')) assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_R1_val_1.fq.gz'))
assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_R2_val_2.fq.gz')) assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_R2_val_2.fq.gz'))
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment