Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNA-seq
Manage
Activity
Members
Labels
Plan
Issues
12
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
GUDMAP_RBK
RNA-seq
Commits
41a2c611
Commit
41a2c611
authored
5 years ago
by
Gervaise Henry
Browse files
Options
Downloads
Plain Diff
Merge develop into branch
parents
8c5b5a80
d499ea11
Branches
Branches containing commit
Tags
Tags containing commit
3 merge requests
!37
v0.0.1
,
!14
Resolve "process_createManifest"
,
!13
Develop
Pipeline
#5730
failed with stages
in 8 minutes and 16 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
workflow/rna-seq.nf
+65
-58
65 additions, 58 deletions
workflow/rna-seq.nf
workflow/tests/test_trimData.py
+4
-1
4 additions, 1 deletion
workflow/tests/test_trimData.py
with
69 additions
and
59 deletions
workflow/rna-seq.nf
+
65
−
58
View file @
41a2c611
...
...
@@ -15,14 +15,7 @@ deriva = Channel
bdbag = Channel
.fromPath(params.bdbag)
.ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" }
Channel.from(params.repRID)
.into {
repRID_getBag
repRID_getData
repRID_parseMetadata
repRID_trimData
}
repRID = params.repRID
outDir = params.outDir
logsDir = "${outDir}/Logs"
...
...
@@ -30,30 +23,36 @@ logsDir = "${outDir}/Logs"
// Define fixed files
derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
// Define script files
script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
/*
* getData: get bagit file from consortium
*/
process getBag {
tag "${repRID
_getBag
}"
publishDir "${logsDir}
/getBag
", mode: '
symlink
', pattern: "${repRID
_getBag
}.getBag.err"
tag "${repRID}"
publishDir "${logsDir}", mode: '
copy
', pattern: "${repRID}.getBag.err"
input:
val repRID_getBag
path credential, stageAs: 'credential.json' from deriva
path derivaConfig
output:
path ("Replicate_*.zip") into bagit
file ("${repRID
_getBag
}.getBag.err")
file ("${repRID}.getBag.err")
script:
"""
hostname >>${repRID
_getBag
}.getBag.err
ulimit -a >>${repRID
_getBag
}.getBag.err
hostname >>${repRID}.getBag.err
ulimit -a >>${repRID}.getBag.err
export https_proxy=\${http_proxy}
ln -sf `readlink -e credential.json` ~/.deriva/credential.json 2>>${repRID_getBag}.getBag.err
echo "LOG: deriva credentials linked" >>${repRID_getBag}.getBag.err
deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID_getBag} 2>>${repRID_getBag}.getBag.err
# link credential file for authentication
ln -sf `readlink -e credential.json` ~/.deriva/credential.json 2>>${repRID}.getBag.err
echo "LOG: deriva credentials linked" >>${repRID}.getBag.err
# deriva-download replicate RID
deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID} 2>>${repRID}.getBag.err
"""
}
...
...
@@ -61,11 +60,11 @@ process getBag {
* getData: fetch study files from consortium with downloaded bdbag.zip
*/
process getData {
tag "${repRID
_getData
}"
publishDir "${logsDir}
/getData
", mode: '
symlink
', pattern: "${repRID
_getData
}.getData.err"
tag "${repRID}"
publishDir "${logsDir}", mode: '
copy
', pattern: "${repRID}.getData.err"
input:
val repRID_getData
path script_bdbagFetch
path cookies, stageAs: 'deriva-cookies.txt' from bdbag
path bagit
...
...
@@ -74,21 +73,29 @@ process getData {
file("**/File.csv") into fileMeta
file("**/Experiment Settings.csv") into experimentSettingsMeta
file("**/Experiment.csv") into experimentMeta
file ("${repRID
_getData
}.getData.err")
file ("${repRID}.getData.err")
script:
"""
hostname >>${repRID
_getData
}.getData.err
ulimit -a >>${repRID
_getData
}.getData.err
hostname >>${repRID}.getData.err
ulimit -a >>${repRID}.getData.err
export https_proxy=\${http_proxy}
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt >>${repRID_getData}.getData.err
echo "LOG: deriva cookie linked" >>${repRID_getData}.getData.err
# link deriva cookie for authentication
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt >>${repRID}.getData.err
echo "LOG: deriva cookie linked" >>${repRID}.getData.err
# get bagit basename
replicate=\$(basename "${bagit}" | cut -d '.' -f1)
echo "LOG: \${replicate}" >>${repRID_getData}.getData.err
unzip ${bagit} 2>>${repRID_getData}.getData.err
echo "LOG: replicate bdbag unzipped" >>${repRID_getData}.getData.err
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} ${repRID_getData} 2>>${repRID_getData}.getData.err
echo "LOG: replicate bdbag fetched" >>${repRID_getData}.getData.err
echo "LOG: \${replicate}" >>${repRID}.getData.err
# unzip bagit
unzip ${bagit} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err
# bagit fetch fastq's only and rename by repRID
sh bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err
"""
}
...
...
@@ -96,11 +103,11 @@ process getData {
* parseMetadata: parses metadata to extract experiment parameters
*/
process parseMetadata {
tag "${repRID
_parseMetadata
}"
publishDir "${logsDir}", mode: 'copy', pattern: "${repRID
_parseMetadata
}.parseMetadata.err"
tag "${repRID}"
publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.parseMetadata.err"
input:
val repRID
_parseMetadata
val repRID
path fileMeta
path experimentSettingsMeta
path experimentMeta
...
...
@@ -113,28 +120,28 @@ process parseMetadata {
script:
"""
hostname >>${repRID
_parseMetadata
}.parseMetadata.err
ulimit -a >>${repRID
_parseMetadata
}.parseMetadata.err
hostname >>${repRID}.parseMetadata.err
ulimit -a >>${repRID}.parseMetadata.err
# Check replicate RID metadata
rep=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID
_parseMetadata
} -m "${fileMeta}" -p repRID)
echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID
_parseMetadata
}.parseMetadata.err
rep=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${fileMeta}" -p repRID)
echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID}.parseMetadata.err
# Get endedness metadata
ends=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID
_parseMetadata
} -m "${experimentSettingsMeta}" -p ends)
echo "LOG: endedness metadata parsed: \${ends}" >>${repRID
_parseMetadata
}.parseMetadata.err
ends=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p ends)
echo "LOG: endedness metadata parsed: \${ends}" >>${repRID}.parseMetadata.err
# Get strandedness metadata
stranded=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID
_parseMetadata
} -m "${experimentSettingsMeta}" -p stranded)
echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID
_parseMetadata
}.parseMetadata.err
stranded=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p stranded)
echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID}.parseMetadata.err
# Get spike-in metadata
spike=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID
_parseMetadata
} -m "${experimentSettingsMeta}" -p spike)
echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID
_parseMetadata
}.parseMetadata.err
spike=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentSettingsMeta}" -p spike)
echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID}.parseMetadata.err
# Get species metadata
specie=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID
_parseMetadata
} -m "${experimentMeta}" -p specie)
echo "LOG: species metadata parsed: \${specie}" >>${repRID
_parseMetadata
}.parseMetadata.err
specie=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID} -m "${experimentMeta}" -p specie)
echo "LOG: species metadata parsed: \${specie}" >>${repRID}.parseMetadata.err
"""
}
...
...
@@ -146,32 +153,32 @@ ends.set {
* trimData: trims any adapter or non-host sequences from the data
*/
process trimData {
tag "${repRID
_trimData
}"
publishDir "${logsDir}
/trimData
", mode: '
symlink
', pattern: "\${repRID
_trimData
}.trimData.*"
tag "${repRID}"
publishDir "${logsDir}", mode: '
copy
', pattern: "\${repRID}.trimData.*"
input:
val repRID_trimData
file(fastq) from fastqs
val ends_trimData
output:
path ("*.fq.gz") into fastqs_trimmed
file ("${repRID_trimData}.trimData.log")
file ("${repRID_trimData}.trimData.err")
val ends
file ("${repRID}.trimData.log")
file ("${repRID}.trimData.err")
script:
"""
if [ `nproc` -gt 8 ]
then
ncore=8
else
ncore=`nproc`
fi
hostname >>${repRID}.trimData.err
ulimit -a >>${repRID}.trimData.err
# trim fastqs
if [ '${ends_trimData}' == 'se' ]
then
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err;
ends='se'
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err;
else
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} ${fastq[1]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err;
ends='pe'
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err;
fi
"""
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
workflow/tests/test_trimData.py
+
4
−
1
View file @
41a2c611
...
...
@@ -9,7 +9,10 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'
/../../
'
@pytest.mark.trimData
def
test_trimData
():
def
test_trimData
_se
():
assert
os
.
path
.
exists
(
os
.
path
.
join
(
test_output_path
,
'
16-1ZX4_trimmed.fq.gz
'
))
@pytest.mark.trimData
def
test_trimData_pe
():
assert
os
.
path
.
exists
(
os
.
path
.
join
(
test_output_path
,
'
Q-Y5JA_R1_val_1.fq.gz
'
))
assert
os
.
path
.
exists
(
os
.
path
.
join
(
test_output_path
,
'
Q-Y5JA_R2_val_2.fq.gz
'
))
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment