Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNA-seq
Manage
Activity
Members
Labels
Plan
Issues
12
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
GUDMAP_RBK
RNA-seq
Commits
7148b6f2
Commit
7148b6f2
authored
4 years ago
by
Gervaise Henry
Browse files
Options
Downloads
Patches
Plain Diff
Add check for number of fastqs
parent
c3c32ba8
2 merge requests
!58
Develop
,
!56
Resolve "Detect error in inferMetadata for tracking"
Pipeline
#8781
passed with stages
in 6 minutes and 27 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
workflow/rna-seq.nf
+93
-9
93 additions, 9 deletions
workflow/rna-seq.nf
with
93 additions
and
9 deletions
workflow/rna-seq.nf
+
93
−
9
View file @
7148b6f2
...
...
@@ -151,7 +151,7 @@ Development : ${params.dev}
"""
/*
*
splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
*
getBag: download input bag
*/
process getBag {
tag "${repRID}"
...
...
@@ -206,7 +206,7 @@ inputBag.into {
}
/*
* getData: fetch
study
files from consortium with downloaded bdbag.zip
* getData: fetch
replicate
files from consortium with downloaded bdbag.zip
*/
process getData {
tag "${repRID}"
...
...
@@ -221,6 +221,7 @@ process getData {
path ("**/File.csv") into fileMeta
path ("**/Experiment Settings.csv") into experimentSettingsMeta
path ("**/Experiment.csv") into experimentMeta
path "fastqCount.csv" into fastqCount_fl
script:
"""
...
...
@@ -246,9 +247,18 @@ process getData {
echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
sh ${script_bdbagFetch} \${replicate::-13} ${repRID}
echo -e "LOG: fetched" >> ${repRID}.getData.log
fastqCount=\$(ls *.fastq.gz | wc -l)
echo -e \${fastqCount} > fastqCount.csv
"""
}
// Split fastq count into channel
fastqCount = Channel.create()
fastqCount_fl.splitCsv(sep: ",", header: false).separate(
fastqCount
)
// Set raw fastq to downloaded or forced input and replicate them for multiple process inputs
if (fastqsForce != "") {
Channel
...
...
@@ -274,9 +284,11 @@ process parseMetadata {
path file from fileMeta
path experimentSettings, stageAs: "ExperimentSettings.csv" from experimentSettingsMeta
path experiment from experimentMeta
val fastqCount
output:
path "design.csv" into metadata_fl
path "fastqCountError.csv" into fastqCountError_fl
script:
"""
...
...
@@ -323,8 +335,28 @@ process parseMetadata {
fi
echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
# check not incorrect number of fastqs
fastqCountError=false
fastqCountError_details=""
if [ ${fastqCount} > 2 ]
then
fastqCountError=true
fastqCountError_details="Too many fastqs detected (>2)"
elif [ "\${endsMeta}"" == "Single Read" ] && [ ${fastqCount} != 1 ]
then
fastqCountError=true
fastqCountError_details="Number of fastqs detected does not match submitted endness"
elif [ "\${endsMeta}"" == "Paired End" ] && [ ${fastqCount} != 2 ]
then
fastqCountError=true
fastqCountError_details="Number of fastqs detected does not match submitted endness"
fi
# save design file
echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv
# save fastq count error file
echo -e "\${fastqCountError},\${fastqCountError_details}" > fastqCountError.csv
"""
}
...
...
@@ -386,6 +418,30 @@ expRID.into {
expRID_uploadProcessedFile
}
// Split fastq count error into separate channel
fastqCountError = Channel.create()
fastqCountError_details = Channel.create()
fastqCountError_fl.splitCsv(sep: ",", header: false).separate(
fastqCountError,
fastqCountError_details
)
// Replicate errors for multiple process inputs
fastqCountError.into {
fastqCountError_getRef
fastqCountError_alignData
fastqCountError_dedupData
fastqCountError_makeBigWig
fastqCountError_countData
fastqCountError_fastqc
fastqCountError_dataQC
fastqCountError_aggrQC
fastqCountError_uploadQC
fastqCountError_uploadProcessedFile
fastqCountError_uploadOutputBag
fastqCountError_finalizeExecutionRun
}
/*
* trimData: trims any adapter or non-host sequences from the data
*/
...
...
@@ -879,7 +935,7 @@ checkMetadata_fl.splitCsv(sep: ",", header: false).separate(
pipelineError_species
)
//
Replicate errors for multiple process inputs
// Replicate errors for multiple process inputs
pipelineError.into {
pipelineError_getRef
pipelineError_alignData
...
...
@@ -1054,12 +1110,14 @@ process getRef {
path credential, stageAs: "credential.json" from deriva_getRef
val spike from spikeInfer_getRef
val species from speciesInfer_getRef
val fastqCountError_getRef
val pipelineError_getRef
output:
tuple path ("hisat2", type: 'dir'), path ("*.bed"), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv") into reference
when:
fastqCountError_getRef == "false"
pipelineError_getRef == "false"
script:
...
...
@@ -1148,6 +1206,7 @@ process alignData {
path reference_alignData
val ends from endsInfer_alignData
val stranded from strandedInfer_alignData
val fastqCountError_alignData
val pipelineError_alignData
output:
...
...
@@ -1155,6 +1214,7 @@ process alignData {
path ("*.alignSummary.txt") into alignQC
when:
fastqCountError_alignData == "false"
pipelineError_alignData == "false"
script:
...
...
@@ -1219,6 +1279,7 @@ process dedupData {
input:
tuple path (bam), path (bai) from rawBam_dedupData
val fastqCountError_dedupData
val pipelineError_dedupData
output:
...
...
@@ -1227,6 +1288,7 @@ process dedupData {
path ("*.deduped.Metrics.txt") into dedupQC
when:
fastqCountError_dedupData == 'false'
pipelineError_dedupData == 'false'
script:
...
...
@@ -1272,12 +1334,14 @@ process makeBigWig {
input:
tuple path (bam), path (bai) from dedupBam_makeBigWig
val fastqCountError_makeBigWig
val pipelineError_makeBigWig
output:
path ("${repRID}_sorted.deduped.bw") into bigwig
when:
fastqCountError_makeBigWig == 'false'
pipelineError_makeBigWig == 'false'
script:
...
...
@@ -1306,6 +1370,7 @@ process countData {
path ref from reference_countData
val ends from endsInfer_countData
val stranded from strandedInfer_countData
val fastqCountError_countData
val pipelineError_countData
output:
...
...
@@ -1314,6 +1379,7 @@ process countData {
path ("assignedReads.csv") into assignedReadsInfer_fl
when:
fastqCountError_countData == 'false'
pipelineError_countData == 'false'
script:
...
...
@@ -1381,6 +1447,7 @@ process fastqc {
input:
path (fastq) from fastqs_fastqc
val fastqCountError_fastqc
val pipelineError_fastqc
output:
...
...
@@ -1388,6 +1455,7 @@ process fastqc {
path ("rawReads.csv") into rawReadsInfer_fl
when:
fastqCountError_fastqc == 'false'
pipelineError_fastqc == 'false'
script:
...
...
@@ -1428,6 +1496,7 @@ process dataQC {
tuple path (bam), path (bai) from dedupBam_dataQC
tuple path (chrBam), path (chrBai) from dedupChrBam
val ends from endsInfer_dataQC
val fastqCountError_dataQC
val pipelineError_dataQC
output:
...
...
@@ -1436,6 +1505,7 @@ process dataQC {
path "${repRID}_insertSize.inner_distance_freq.txt" into innerDistance
when:
fastqCountError_dataQC == 'false'
pipelineError_dataQC == 'false'
script:
...
...
@@ -1512,6 +1582,7 @@ process aggrQC {
val tinMedI from tinMedInfer
val studyRID from studyRID_aggrQC
val expRID from expRID_aggrQC
val fastqCountError_aggrQC
val pipelineError_aggrQC
output:
...
...
@@ -1519,6 +1590,7 @@ process aggrQC {
path "${repRID}.multiqc_data.json" into multiqcJSON
when:
fastqCountError_aggrQC == 'false'
pipelineError_aggrQC == 'false'
script:
...
...
@@ -1603,6 +1675,7 @@ process uploadQC {
val length from readLengthInfer_uploadQC
val rawCount from rawReadsInfer_uploadQC
val finalCount from assignedReadsInfer_uploadQC
val fastqCountError_uploadQC
val pipelineError_uploadQC
output:
...
...
@@ -1610,6 +1683,7 @@ process uploadQC {
when:
upload
fastqCountError_uploadQC == 'false'
pipelineError_uploadQC == 'false'
script:
...
...
@@ -1673,6 +1747,7 @@ process uploadProcessedFile {
val studyRID from studyRID_uploadProcessedFile
val expRID from expRID_uploadProcessedFile
val executionRunRID from executionRunRID_uploadProcessedFile
val fastqCountError_uploadProcessedFile
val pipelineError_uploadProcessedFile
output:
...
...
@@ -1680,6 +1755,7 @@ process uploadProcessedFile {
when:
upload
fastqCountError_uploadProcessedFile == 'false'
pipelineError_uploadProcessedFile == 'false'
script:
...
...
@@ -1759,6 +1835,7 @@ process uploadOutputBag {
path outputBag
val studyRID from studyRID_uploadOutputBag
val executionRunRID from executionRunRID_uploadOutputBag
val fastqCountError_uploadOutputBag
val pipelineError_uploadOutputBag
output:
...
...
@@ -1766,6 +1843,7 @@ process uploadOutputBag {
when:
upload
fastqCountError_uploadOutputBag == 'false'
pipelineError_uploadOutputBag == 'false'
script:
...
...
@@ -1805,12 +1883,11 @@ process uploadOutputBag {
}
// Extract output bag RID into channel
outputBagRID = Channel.value()
outputBagRID_temp = Channel.create()
outputBagRID = Channel.create()
outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
outputBagRID
_temp
outputBagRID
)
outputBagRID
= outputBagRID_temp
outputBagRID
.ifEmpty(false)
/*
* finalizeExecutionRun: finalizes the execution run
...
...
@@ -1832,6 +1909,8 @@ process finalizeExecutionRun {
val strandedInfer from strandedInfer_finalizeExecutionRun
val spikeInfer from spikeInfer_finalizeExecutionRun
val speciesInfer from speciesInfer_finalizeExecutionRun
val fastqCountError from fastqCountError_finalizeExecutionRun
val fastqCountError_details
val pipelineError from pipelineError_finalizeExecutionRun
val pipelineError_ends
val pipelineError_stranded
...
...
@@ -1853,11 +1932,16 @@ process finalizeExecutionRun {
cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
cookie=\${cookie:11:-1}
if [ ${pipelineError} == false ]
if [
${fastqCountError} == false ] && [
${pipelineError} == false ]
then
rid=\$(python3 ${script_uploadExecutionRun_finalizeExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Success -d 'Run Successful' -o ${source} -c \${cookie} -u ${executionRunRID})
echo LOG: execution run RID marked as successful - \${rid} >> ${repRID}.finalizeExecutionRun.log
else
elif [ ${fastqCountError} == true ]
then
rid=\$(python3 ${script_uploadExecutionRun_finalizeExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "${fastqCountError_details}" -o ${source} -c \${cookie} -u ${executionRunRID})
echo LOG: execution run RID marked as error - \${rid} >> ${repRID}.finalizeExecutionRun.log
elif [ ${pipelineError} == true ]
then
pipelineError_details=\$(echo "**Submitted metadata does not match infered:** ")
if ${pipelineError_ends}
then
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment