diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7c306463db48b35e0f78d1ba31d46960686b01c9..3a60d00c8189145dc5c6b8bbbffa70af63cab17a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -107,13 +107,14 @@ parseMetadata: - rep=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID) - exp=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID) - study=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID) - - endsMeta=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta) + - endsRaw=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta) + - endsMeta="uk" - endsManual=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual) - stranded=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded) - spike=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike) - species=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species) - readLength=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength) - - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv + - echo -e "${endsMeta},${endsRaw},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv - pytest -m parseMetadata artifacts: name: "$CI_JOB_NAME" diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index c7da457756aa5fd0bf18538b88456d93fddf798e..f1334a3f37c35de495dc96279f3cf57b128ad1b9 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -310,8 +310,21 @@ process parseMetadata { echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log # get endedness metadata - endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta) - echo -e "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log + endsRaw=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta) + echo -e "LOG: endedness metadata parsed: \${endsRaw}" >> ${repRID}.parseMetadata.log + if [ "\${endsRaw}" == "Single Read" ] + then + endsMeta="se" + elif [ "\${endsRaw}" == "Paired End" ] + then + endsMeta="pe" + else + endsMeta="unknown" + fi + if [ "\${endsRaw}" == "" ] + then + endsRaw="_No value_" + fi # ganually get endness endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p endsManual) @@ -344,18 +357,18 @@ process parseMetadata { then fastqCountError=true fastqCountError_details="Too many fastqs detected (>2)" - elif [ "\${endsMeta}"" == "Single Read" ] && [ "${fastqCount}" -ne "1" ] + elif [ "\${endsMeta}"" == "se" ] && [ "${fastqCount}" -ne "1" ] then fastqCountError=true fastqCountError_details="Number of fastqs detected does not match submitted endness" - elif [ "\${endsMeta}"" == "Paired End" ] && [ "${fastqCount}" -ne "2" ] + elif [ "\${endsMeta}"" == "pe" ] && [ "${fastqCount}" -ne "2" ] then fastqCountError=true fastqCountError_details="Number of fastqs detected does not match submitted endness" fi # save design file - echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv + echo -e "\${endsMeta},\${endsRaw},\${endsManual},\${stranded},\${strandedRaw},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv # save fastq count error file echo -e "\${fastqCountError},\${fastqCountError_details}" > fastqCountError.csv @@ -364,6 +377,7 @@ process parseMetadata { // Split metadata into separate channels endsMeta = Channel.create() +endsRaw = Channel.create() endsManual = Channel.create() strandedMeta = Channel.create() spikeMeta = Channel.create() @@ -373,6 +387,7 @@ expRID = Channel.create() studyRID = Channel.create() metadata_fl.splitCsv(sep: ",", header: false).separate( endsMeta, + endsRaw, endsManual, strandedMeta, spikeMeta, @@ -1939,6 +1954,7 @@ process failExecutionRun { val executionRunRID from executionRunRID_failExecutionRun val inputBagRID from inputBagRID_failExecutionRun val endsMeta from endsMeta_failExecutionRun + val endsRaw val strandedMeta from strandedMeta_failExecutionRun val spikeMeta from spikeMeta_failExecutionRun val speciesMeta from speciesMeta_failExecutionRun @@ -1985,15 +2001,6 @@ process failExecutionRun { pipelineError_details=\$(echo \${pipelineError_details}"|:-:|-:|-:|\\n") if ${pipelineError_ends} then - if [ "${endsMeta}" == "se" ] - then - endMeta="Single End" - elif [ "${endsMeta}" == "pe" ] - then - endMeta="Paired End" - else - endMeta="unknown" - fi if [ "${endsInfer}" == "se" ] then endInfer="Single End" @@ -2003,7 +2010,7 @@ process failExecutionRun { else endInfer="unknown" fi - pipelineError_details=\$(echo \${pipelineError_details}"|Paired End|"\${endMeta}"|"\${endInfer}"|\\n") + pipelineError_details=\$(echo \${pipelineError_details}"|Paired End|${endsRaw}|"\${endInfer}"|\\n") fi if ${pipelineError_stranded} then diff --git a/workflow/scripts/parse_meta.py b/workflow/scripts/parse_meta.py index 16411df357555991fefdcbd65a6cf0f1f0667017..51a8105f997aab376afe568b58877fb300ea5022 100644 --- a/workflow/scripts/parse_meta.py +++ b/workflow/scripts/parse_meta.py @@ -62,12 +62,7 @@ def main(): # Get endedness metadata from 'Experiment Settings.csv' if (args.parameter == "endsMeta"): - if (metaFile.Paired_End.unique() == "Single End"): - endsMeta = "se" - elif (metaFile.Paired_End.unique() == "Paired End"): - endsMeta = "pe" - else: - endsMeta = "uk" + endsMeta = metaFile.Paired_End.unique() print(endsMeta) # Manually get endness count from 'File.csv' diff --git a/workflow/tests/test_parseMetadata.py b/workflow/tests/test_parseMetadata.py index fa488800e4b6aeb5b1be2685d75b2801667f0855..5a14fcd885b79d944e46de5d936d17fc941def7b 100644 --- a/workflow/tests/test_parseMetadata.py +++ b/workflow/tests/test_parseMetadata.py @@ -19,7 +19,7 @@ def readLine(fileName): data = False file = open(fileName, "r") line = file.readline() - if line.strip() == "uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID": + if line.strip() == "uk,uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID": data = True return data