Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
GUDMAP_RBK
RNA-seq
Commits
b32381bd
Commit
b32381bd
authored
Jan 14, 2021
by
Gervaise Henry
🤠
Browse files
Merge resolution
parents
77c4e3fd
ea1722e5
Pipeline
#9019
passed with stages
in 5 minutes and 18 seconds
Changes
6
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
b32381bd
...
...
@@ -705,6 +705,51 @@ integration_pe:
-
always
failAmbiguousSpecies
:
stage
:
integration
only
:
[
merge_requests
]
except
:
variables
:
-
$CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script
:
-
hostname
-
ulimit -a
-
nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload
true
-with-dag dag.png --dev
false
--ci
true
retry
:
max
:
0
when
:
-
always
failTrunkation
:
stage
:
integration
only
:
[
merge_requests
]
except
:
variables
:
-
$CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script
:
-
hostname
-
ulimit -a
-
nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload
true
-with-dag dag.png --dev
false
--ci
true
retry
:
max
:
0
when
:
-
always
failMismatchR1R2
:
stage
:
integration
only
:
[
merge_requests
]
except
:
variables
:
-
$CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script
:
-
hostname
-
ulimit -a
-
nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload
true
-with-dag dag.png --dev
false
--ci
true
retry
:
max
:
0
when
:
-
always
override_inputBag
:
stage
:
integration
only
:
[
merge_requests
]
...
...
docs/dag.png
View replaced file @
77c4e3fd
View file @
b32381bd
3.97 MB
|
W:
|
H:
4.57 MB
|
W:
|
H:
2-up
Swipe
Onion skin
docs/software_versions_mqc.yaml
View file @
b32381bd
...
...
@@ -20,5 +20,5 @@
<dt>deepTools</dt><dd>v3.5.0</dd>
<dt>FastQC</dt><dd>v0.11.9</dd>
<dt>MultiQC</dt><dd>v1.9</dd>
<dt>Pipeline Version</dt><dd>v1.0.
0
</dd>
<dt>Pipeline Version</dt><dd>v1.0.
1
</dd>
</dl>
workflow/nextflow.config
View file @
b32381bd
...
...
@@ -122,6 +122,6 @@ manifest {
homePage
=
'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description
=
'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript
=
'rna-seq.nf'
version
=
'v1.0.
0
'
version
=
'v1.0.
1
'
nextflowVersion
=
'>=19.09.0'
}
workflow/rna-seq.nf
View file @
b32381bd
...
...
@@ -47,7 +47,8 @@ deriva.into {
deriva_uploadProcessedFile
deriva_uploadOutputBag
deriva_finalizeExecutionRun
deriva_failPreExecutionRun
deriva_failPreExecutionRun_fastq
deriva_failPreExecutionRun_species
deriva_failExecutionRun
}
bdbag = Channel
...
...
@@ -98,7 +99,8 @@ script_tinHist = Channel.fromPath("${baseDir}/scripts/tin_hist.py")
script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/upload_input_bag.py")
script_uploadExecutionRun_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadExecutionRun_finalizeExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadExecutionRun_failPreExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadExecutionRun_failPreExecutionRun_fastq = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadExecutionRun_failPreExecutionRun_species = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadExecutionRun_failExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
script_uploadQC = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
script_uploadOutputBag = Channel.fromPath("${baseDir}/scripts/upload_output_bag.py")
...
...
@@ -448,13 +450,15 @@ strandedMeta.into {
spikeMeta.into {
spikeMeta_checkMetadata
spikeMeta_aggrQC
spikeMeta_failPreExecutionRun
spikeMeta_failPreExecutionRun_fastq
spikeMeta_failPreExecutionRun_species
spikeMeta_failExecutionRun
}
speciesMeta.into {
speciesMeta_checkMetadata
speciesMeta_aggrQC
speciesMeta_failPreExecutionRun
speciesMeta_failPreExecutionRun_fastq
speciesMeta_failPreExecutionRun_species
speciesMeta_failExecutionRun
}
studyRID.into {
...
...
@@ -500,7 +504,7 @@ fastqCountError.into {
fastqCountError_uploadQC
fastqCountError_uploadProcessedFile
fastqCountError_uploadOutputBag
fastqCountError_failPreExecutionRun
fastqCountError_failPreExecutionRun
_fastq
}
fastqReadError.into {
fastqReadError_trimData
...
...
@@ -521,7 +525,7 @@ fastqReadError.into {
fastqReadError_uploadQC
fastqReadError_uploadProcessedFile
fastqReadError_uploadOutputBag
fastqReadError_failPreExecutionRun
fastqReadError_failPreExecutionRun
_fastq
}
/*
...
...
@@ -987,7 +991,7 @@ speciesError.into {
speciesError_uploadQC
speciesError_uploadProcessedFile
speciesError_uploadOutputBag
speciesError_failPreExecutionRun
speciesError_failPreExecutionRun
_species
}
/*
...
...
@@ -1180,7 +1184,8 @@ inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
inputBagRID.into {
inputBagRID_uploadExecutionRun
inputBagRID_finalizeExecutionRun
inputBagRID_failPreExecutionRun
inputBagRID_failPreExecutionRun_fastq
inputBagRID_failPreExecutionRun_species
inputBagRID_failExecutionRun
}
...
...
@@ -2165,32 +2170,30 @@ process finalizeExecutionRun {
}
/*
* failPreExecutionRun: fail the execution run prematurely
* failPreExecutionRun
_fastq
: fail the execution run prematurely
for fastq errors
*/
process failPreExecutionRun {
process failPreExecutionRun
_fastq
{
tag "${repRID}"
input:
path script_uploadExecutionRun_failPreExecutionRun
path credential, stageAs: "credential.json" from deriva_failPreExecutionRun
val spike from spikeMeta_failPreExecutionRun
val species from speciesMeta_failPreExecutionRun
val inputBagRID from inputBagRID_failPreExecutionRun
val fastqCountError from fastqCountError_failPreExecutionRun
path
script_uploadExecutionRun from
script_uploadExecutionRun_failPreExecutionRun
_fastq
path credential, stageAs: "credential.json" from deriva_failPreExecutionRun
_fastq
val spike from spikeMeta_failPreExecutionRun
_fastq
val species from speciesMeta_failPreExecutionRun
_fastq
val inputBagRID from inputBagRID_failPreExecutionRun
_fastq
val fastqCountError from fastqCountError_failPreExecutionRun
_fastq
val fastqCountError_details
val fastqReadError from fastqReadError_failPreExecutionRun
val fastqReadError from fastqReadError_failPreExecutionRun
_fastq
val fastqReadError_details
val speciesError from speciesError_failPreExecutionRun
val speciesError_details
when:
upload
fastqCountError == 'true' || fastqReadError == 'true'
|| speciesError == 'true'
fastqCountError == 'true' || fastqReadError == 'true'
script:
"""
hostname > ${repRID}.failPreExecutionRun.log
ulimit -a >> ${repRID}.failPreExecutionRun.log
hostname > ${repRID}.failPreExecutionRun
_fastq
.log
ulimit -a >> ${repRID}.failPreExecutionRun
_fastq
.log
errorDetails=""
if [ ${fastqCountError} == true ]
...
...
@@ -2199,16 +2202,95 @@ process failPreExecutionRun {
elif [ ${fastqReadError} == true ]
then
errorDetails=\$(echo \$(errorDetails)${fastqReadError_details}"\\n")
elif [ ${speciesError} == true ]
fi
echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.failPreExecutionRun_fastq.log
workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
workflow=\${workflow:7:-6}
echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.failPreExecutionRun_fastq.log
if [ "${species}" == "Homo sapiens" ]
then
genomeName=\$(echo GRCh${refHuVersion})
elif [ "${species}" == "Mus musculus" ]
then
genomeName=\$(echo GRCm${refMoVersion})
fi
if [ "${spike}" == "yes" ]
then
genomeName=\$(echo \${genomeName}-S)
fi
echo LOG: searching for genome name - \${genomeName} >> ${repRID}.failPreExecutionRun_fastq.log
genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
genome=\${genome:7:-6}
echo LOG: genome RID extracted - \${genome} >> ${repRID}.failPreExecutionRun_fastq.log
cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
cookie=\${cookie:11:-1}
exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
echo \${exist} >> ${repRID}.failPreExecutionRun_fastq.log
if [ "\${exist}" == "[]" ]
then
rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u F)
echo LOG: execution run RID uploaded - \${rid} >> ${repRID}.failPreExecutionRun_fastq.log
else
rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
rid=\${rid:7:-6}
echo \${rid} >> ${repRID}.failPreExecutionRun_fastq.log
executionRun_rid==\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u \${rid})
echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.failPreExecutionRun_fastq.log
fi
dt=`date +%FT%T.%3N%:z`
curl -H 'Content-Type: application/json' -X PUT -d \
'{ \
"ID": "${workflow.sessionId}", \
"ExecutionRunRID": "'\${rid}'", \
"Failure": "'\${dt}'" \
}' \
"https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
"""
}
/*
* failPreExecutionRun_species: fail the execution run prematurely for species error
*/
process failPreExecutionRun_species {
tag "${repRID}"
input:
path script_uploadExecutionRun from script_uploadExecutionRun_failPreExecutionRun_species
path credential, stageAs: "credential.json" from deriva_failPreExecutionRun_species
val spike from spikeMeta_failPreExecutionRun_species
val species from speciesMeta_failPreExecutionRun_species
val inputBagRID from inputBagRID_failPreExecutionRun_species
val speciesError from speciesError_failPreExecutionRun_species
val speciesError_details
when:
upload
speciesError == 'true'
script:
"""
hostname > ${repRID}.failPreExecutionRun_species.log
ulimit -a >> ${repRID}.failPreExecutionRun_species.log
errorDetails=""
if [ ${speciesError} == true ]
then
errorDetails=\$(echo \$(errorDetails)${speciesError_details}"\\n")
fi
echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.failPreExecutionRun.log
echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.failPreExecutionRun
_species
.log
workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
workflow=\${workflow:7:-6}
echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.failPreExecutionRun.log
echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.failPreExecutionRun
_species
.log
if [ "${species}" == "Homo sapiens" ]
then
...
...
@@ -2221,33 +2303,34 @@ process failPreExecutionRun {
then
genomeName=\$(echo \${genomeName}-S)
fi
echo LOG: searching for genome name - \${genomeName} >> ${repRID}.failPreExecutionRun.log
echo LOG: searching for genome name - \${genomeName} >> ${repRID}.failPreExecutionRun
_species
.log
genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
genome=\${genome:7:-6}
echo LOG: genome RID extracted - \${genome} >> ${repRID}.failPreExecutionRun.log
echo LOG: genome RID extracted - \${genome} >> ${repRID}.failPreExecutionRun
_species
.log
cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
cookie=\${cookie:11:-1}
exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
echo \${exist} >> ${repRID}.failPreExecutionRun.log
echo \${exist} >> ${repRID}.failPreExecutionRun
_species
.log
if [ "\${exist}" == "[]" ]
then
rid=\$(python3 ${script_uploadExecutionRun
_failPreExecutionRun
} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u F)
echo LOG: execution run RID uploaded - \${rid} >> ${repRID}.failPreExecutionRun.log
rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u F)
echo LOG: execution run RID uploaded - \${rid} >> ${repRID}.failPreExecutionRun
_species
.log
else
rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
rid=\${rid:7:-6}
echo \${rid} >> ${repRID}.failPreExecutionRun.log
executionRun_rid==\$(python3 ${script_uploadExecutionRun
_failPreExecutionRun
} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u \${rid})
echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.failPreExecutionRun.log
echo \${rid} >> ${repRID}.failPreExecutionRun
_species
.log
executionRun_rid==\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u \${rid})
echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.failPreExecutionRun
_species
.log
fi
dt=`date +%FT%T.%3N%:z`
curl -H 'Content-Type: application/json' -X PUT -d \
'{ \
"ID": "${workflow.sessionId}", \
"ExecutionRunRID": "'\${rid}'", \
"Failure": "'\${dt}'" \
}' \
"https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
...
...
@@ -2339,6 +2422,7 @@ process failExecutionRun {
curl -H 'Content-Type: application/json' -X PUT -d \
'{ \
"ID": "${workflow.sessionId}", \
"ExecutionRunRID": "'\${rid}'", \
"Failure": "'\${dt}'" \
}' \
"https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
...
...
workflow/scripts/get_updated_rep_count.sh
View file @
b32381bd
...
...
@@ -3,19 +3,34 @@
echo
"collecting stats for badges"
latest_release_tag
=
$(
git tag
--sort
=
-committerdate
-l
*
.
*
.
*
|
head
-1
)
current_pipeline_version
=
$(
git show
${
latest_release_tag
}
:workflow/nextflow.config |
grep
-o
version.
*
|
grep
-oP
"(?<=').*(?=')"
)
current_pipeline_versionMajor
=
$(
echo
${
current_pipeline_version
}
|
cut
-f1
-d
"."
)
current_pipeline_versionMajor
=
$(
echo
${
current_pipeline_versionMajor
}
"."
)
echo
"Major pipeline version for search: "
${
current_pipeline_versionMajor
}
echo
"collecting workflow RIDs from servers"
dev_workflow_RID
=
$(
curl
-s
https://dev.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version
=
${
current_pipeline_version
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
)
dev_workflow_RID
=
${
dev_workflow_RID
:7:-6
}
staging_workflow_RID
=
$(
curl
-s
https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version
=
${
current_pipeline_version
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
)
staging_workflow_RID
=
${
staging_workflow_RID
:7:-6
}
prod_workflow_RID
=
$(
curl
-s
https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version
=
${
current_pipeline_version
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
)
prod_workflow_RID
=
${
prod_workflow_RID
:7:-6
}
dev_workflow_RID
=
$(
curl
-s
https://dev.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version::ciregexp::%5E
${
current_pipeline_versionMajor
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
|
cut
-f4
-d
"
\"
"
)
staging_workflow_RID
=
$(
curl
-s
https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version::ciregexp::%5E
${
current_pipeline_versionMajor
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
|
cut
-f4
-d
"
\"
"
)
prod_workflow_RID
=
$(
curl
-s
https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Workflow/Version::ciregexp::%5E
${
current_pipeline_versionMajor
}
|
grep
-o
'\"RID\":\".*\",\"RCT'
|
cut
-f4
-d
"
\"
"
)
echo
"collecting unique replicates with successful execution runs"
dev_count
=
$(
curl
-s
https://dev.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
dev_workflow_RID
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
staging_count
=
$(
curl
-s
https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
staging_workflow_RID
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
prod_count
=
$(
curl
-s
https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
prod_workflow_RID
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
dev_count
=
0
for
rid
in
${
dev_workflow_RID
}
do
temp_count
=
$(
curl
-s
https://dev.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
rid
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
dev_count
=
$(
expr
${
dev_count
}
+
${
temp_count
}
)
done
staging_count
=
0
for
rid
in
${
staging_workflow_RID
}
do
temp_count
=
$(
curl
-s
https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
rid
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
staging_count
=
$(
expr
${
staging_count
}
+
${
temp_count
}
)
done
prod_count
=
0
for
rid
in
${
prod_workflow_RID
}
do
temp_count
=
$(
curl
-s
https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Execution_Status
=
Success/Workflow
=
${
rid
}
|
grep
-o
\"
Replicate
\"
.
*
,
\"
Workflow |
grep
-oP
"(?<=
\"
Replicate
\"
:
\"
).*(?=
\"
,
\"
Workflow)"
|
sort
|
uniq
|
wc
-l
)
prod_count
=
$(
expr
${
prod_count
}
+
${
temp_count
}
)
done
echo
"collecting badges"
mkdir
-p
./badges/counts
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment