From e8b92b99929afd08adfdd5f0d1c2fe857d6e0562 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Mon, 3 Aug 2020 14:23:28 -0500
Subject: [PATCH] Extract read length from submitter metadata

---
 .gitlab-ci.yml                       | 3 ++-
 workflow/rna-seq.nf                  | 8 +++++++-
 workflow/scripts/parseMeta.py        | 5 +++++
 workflow/tests/test_parseMetadata.py | 2 +-
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b36504f..29f4925 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -38,7 +38,8 @@ parseMetadata:
   - stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
   - spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
   - species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
-  - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${exp},${study},${rep}" > design.csv
+  - readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength)
+  - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
   - pytest -m parseMetadata
 
 inferMetadata:
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 2e1e2b2..1443869 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -237,8 +237,12 @@ process parseMetadata {
     species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species)
     echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log
 
+    # get read length metadata
+    readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p readLength)
+    echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
+
     # gave design file
-    echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${exp},\${study}" > design.csv
+    echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv
     """
 }
 
@@ -248,6 +252,7 @@ endsManual = Channel.create()
 strandedMeta = Channel.create()
 spikeMeta = Channel.create()
 speciesMeta = Channel.create()
+readLengthMeta = Channel.create()
 expRID = Channel.create()
 studyRID = Channel.create()
 metadata.splitCsv(sep: ",", header: false).separate(
@@ -256,6 +261,7 @@ metadata.splitCsv(sep: ",", header: false).separate(
   strandedMeta,
   spikeMeta,
   speciesMeta,
+  readLengthMeta,
   expRID,
   studyRID
 )
diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py
index fd73cf3..453e418 100644
--- a/workflow/scripts/parseMeta.py
+++ b/workflow/scripts/parseMeta.py
@@ -102,5 +102,10 @@ def main():
             exit(1)
         print(species)
 
+    # Get read length metadata from 'Experiment Settings.csv'
+    if (args.parameter == "readLength"):
+        readLength = metaFile.Read_Length.unique()
+        print(readLength)
+
 if __name__ == '__main__':
     main()
diff --git a/workflow/tests/test_parseMetadata.py b/workflow/tests/test_parseMetadata.py
index 31a9674..59677bb 100644
--- a/workflow/tests/test_parseMetadata.py
+++ b/workflow/tests/test_parseMetadata.py
@@ -17,7 +17,7 @@ def readLine(fileName):
     data = False
     file = open(fileName, "r")
     line = file.readline()
-    if line.strip() == "uk,se,unstranded,no,Homo sapiens,Experiment_RID,Study_RID,Replicate_RID":
+    if line.strip() == "uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID":
         data = True
 
     return data
-- 
GitLab