From b9798355324f44a4a7d529aae4b1a7f842dd7bdc Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Mon, 25 Jan 2021 12:24:51 -0600
Subject: [PATCH] Convert strandedness from yes/no to t/f

---
 CHANGELOG.md                   |  1 +
 workflow/rna-seq.nf            | 25 ++++++++++++++++++++++---
 workflow/scripts/parse_meta.py |  7 +------
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 637e6d9..9eb41cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # v2.0.0
 **User Facing**
 * Endness metadata "Single Read" changed to "Single End" in data-hub, pipeline updated to handle (#110) ("Single Read" still acceptable for backwards compatibility)
+* Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
 
 **Background**
 * Add memory limit (75%) per thread for samtools sort (#108)
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index bce0d20..57ec3ac 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -367,8 +367,27 @@ process parseMetadata {
     echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
 
     # get spike-in metadata
-    spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike)
+    v=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike)
     echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
+    if [ "\${spike}" == "f" ]
+    then
+      spike="false"
+    elif [ "\${spike}" == "t" ]
+    then
+      spike="true"
+    elseif [ "\${spike}" == "no" ]
+    # "yes"/"no" depreciated as of Jan 2021, this option is present for backwards compatibility
+    then
+      spike="false"
+    elseif [ "\${spike}" == "yes" ]
+    # "yes"/"no" depreciated as of Jan 2021, this option is present for backwards compatibility
+    then
+      spike="true"
+    elif [ "\${spike}" == "nan" ]
+    then
+      endsRaw="_No value_"
+      endsMeta="NA"
+    fi
 
     # get species metadata
     species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species)
@@ -935,9 +954,9 @@ process inferMetadata {
     # determine spike-in
     if [ 1 -eq \$(echo \$(expr \${align_ercc} ">=" 10)) ]
     then
-      spike="yes"
+      spike="true"
     else
-      spike="no"
+      spike="false"
     fi
     echo -e "LOG: inference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
 
diff --git a/workflow/scripts/parse_meta.py b/workflow/scripts/parse_meta.py
index fdbc86c..b5379bb 100644
--- a/workflow/scripts/parse_meta.py
+++ b/workflow/scripts/parse_meta.py
@@ -73,12 +73,7 @@ def main():
 
     # Get spike-in metadata from 'Experiment Settings.csv'
     if (args.parameter == "spike"):
-        if (metaFile.Used_Spike_Ins.unique() == "yes"):
-            spike = "yes"
-        elif (metaFile.Used_Spike_Ins.unique() == "no"):
-            spike = "no"
-        else:
-            spike = metaFile.Used_Spike_Ins.unique()[0]
+        spike = metaFile.Used_Spike_Ins.unique()[0]
         print(spike)
 
     # Get species metadata from 'Experiment.csv'
-- 
GitLab