From 710a67d9f766777667679e24af93212ea764a928 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Sat, 18 Jan 2020 17:55:28 -0600
Subject: [PATCH] Add checks for meta repRID

---
 workflow/conf/aws_ondemand.config |  7 -------
 workflow/conf/aws_spot.config     |  7 -------
 workflow/conf/biohpc.config       |  6 +++---
 workflow/nextflow.config          | 15 +++++++++++++++
 workflow/rna-seq.nf               |  1 +
 workflow/scripts/parseMeta.py     | 17 +++++++++++------
 6 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config
index 1a14ebf..84fcb27 100755
--- a/workflow/conf/aws_ondemand.config
+++ b/workflow/conf/aws_ondemand.config
@@ -13,14 +13,7 @@ process {
   cpus = 1
   memory = '1 GB'
 
-  withName:getBag {
-    container = 'bicf/gudmaprbkfilexfer:1.3'
-  }
-  withName:getData {
-    container = 'bicf/gudmaprbkfilexfer:1.3'
-  }
   withName:trimData {
-    container = 'bicf/trimgalore:1.1'
     cpus = 15
   }
 }
\ No newline at end of file
diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config
index b5239a2..fbccb3c 100755
--- a/workflow/conf/aws_spot.config
+++ b/workflow/conf/aws_spot.config
@@ -13,14 +13,7 @@ process {
   cpus = 1
   memory = '1 GB'
 
-  withName:getBag {
-    container = 'bicf/gudmaprbkfilexfer:1.3'
-  }
-  withName:getData {
-    container = 'bicf/gudmaprbkfilexfer:1.3'
-  }
   withName:trimData {
-    container = 'bicf/trimgalore:1.1'
     cpus = 15
   }
 }
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 20da91a..36d5b33 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -5,16 +5,16 @@ process {
 
   withName:getBag {
     executor = 'local'
-    container = 'docker://bicf/gudmaprbkfilexfer:1.3'
   }
   withName:getData {
     executor = 'local'
-    container = 'docker://bicf/gudmaprbkfilexfer:1.3'
   }
   withName:trimData {
-    container = 'docker://bicf/trimgalore:1.1'
     queue = '256GB,256GBv1,384GB'
   }
+  withName:parseMetadata {
+    executor = 'local'
+  }
 }
 
 singularity {
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index 3758499..2635a79 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -10,6 +10,21 @@ profiles {
   }
 }
 
+process {
+  withName:getBag {
+    container = 'bicf/gudmaprbkfilexfer:1.3'
+  }
+  withName:getData {
+    container = 'bicf/gudmaprbkfilexfer:1.3'
+  }
+  withName:trimData {
+    container = 'bicf/trimgalore:1.1'
+  }
+  withName:parseMetadata {
+    container = 'bicf/python:1.3'
+  }
+}
+
 trace {
   enabled = true
   file = 'pipeline_trace.txt'
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 6e4199d..a10a29a 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -111,6 +111,7 @@ process parseMetadata {
     """
     hostname >>${repRID_parseMetadata}.parseMetadata.err
     ulimit -a >>${repRID_parseMetadata}.parseMetadata.err
+    python ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m ${fileMeta} -p repRID
     """
 
 }
diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py
index 115dd6d..bd872fc 100644
--- a/workflow/scripts/parseMeta.py
+++ b/workflow/scripts/parseMeta.py
@@ -14,14 +14,19 @@ def get_args():
 
 def main():
     args = get_args()
-    metaFile = pd.read_csv(args.metaFile,sep="\t",header=None)
+    metaFile = pd.read_csv(args.metaFile,sep=",",header=0)
     if (args.parameter == "repRID"):
         if (len(metaFile.Replicate_RID.unique()) > 1):
-            #ERROR
-        if not (metaFile.Replicate_RID == arg$repRID):
-            #ERROR
-        if (len(fileFile[fileFile["File_Type"] == "FastQ"].RID) > 2):
-            #ERROR
+            print("There are multiple replicate RID's in the metadata: " + " ".join(metaFile.Replicate_RID.unique()))
+            exit(1)
+        if not (metaFile.Replicate_RID.unique() == args.repRID):
+            print("Replicate RID in metadata does not match run parameters: " + metaFile.Replicate_RID.unique() + " vs " + args.repRID)
+            exit(1)
+        else:
+            print(metaFile["Replicate_RID"].unique())
+        if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2):
+            print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID))
+            exit(1)
 
 
 if __name__ == '__main__':
-- 
GitLab