From 158a687eea1e96614365e392a9d729808aa6cb01 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Sat, 8 Aug 2020 21:15:34 -0500
Subject: [PATCH] Add scripts to run from study RID

---
 .gitignore                     |  2 ++
 cleanup.sh                     |  2 ++
 workflow/scripts/splitStudy.py | 24 ++++++++++++++++++++++++
 workflow/scripts/splitStudy.sh | 17 +++++++++++++++++
 4 files changed, 45 insertions(+)
 create mode 100644 workflow/scripts/splitStudy.py
 create mode 100644 workflow/scripts/splitStudy.sh

diff --git a/.gitignore b/.gitignore
index f500ef7..1228878 100644
--- a/.gitignore
+++ b/.gitignore
@@ -297,6 +297,8 @@ timeline*.html*
 *.tmp
 *.swp
 *.out
+*_studyRID.json
+*_studyRID.csv
 run*.sh
 
 !.gitkeep
diff --git a/cleanup.sh b/cleanup.sh
index 9569ff5..aa28920 100644
--- a/cleanup.sh
+++ b/cleanup.sh
@@ -5,3 +5,5 @@ rm timeline*.html*
 rm .nextflow*.log*
 rm -r .nextflow/
 rm -r work/
+rm *_studyRID.json
+rm *_studyRID.csv
diff --git a/workflow/scripts/splitStudy.py b/workflow/scripts/splitStudy.py
new file mode 100644
index 0000000..82ffc28
--- /dev/null
+++ b/workflow/scripts/splitStudy.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+import argparse
+import pandas as pd
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-s', '--studyRID',help="The study RID.",required=True)
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = get_args()
+    studyRID=pd.read_json(args.studyRID+"_studyRID.json")
+    if studyRID["RID"].count() > 0:
+        studyRID["RID"].to_csv(args.studyRID+"_studyRID.csv",header=False,index=False)
+    else:
+        raise Exception("No associated replicates found: %s" %
+            studyRID)
+
+if __name__ == '__main__':
+    main()
diff --git a/workflow/scripts/splitStudy.sh b/workflow/scripts/splitStudy.sh
new file mode 100644
index 0000000..a64b6d9
--- /dev/null
+++ b/workflow/scripts/splitStudy.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# query GUDMAP/RBK for study RID
+echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json
+
+# extract replicate RIDs
+module load python/3.6.4-anaconda
+python3 ./workflow/scripts/splitStudy.py -s $1
+
+# run pipeline on replicate RIDs in parallel
+module load nextflow/20.01.0
+module load singularity/3.5.3
+while read repRID; do echo ${repRID}; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow run workflow/rna-seq.nf --repRID {}
+
+# cleanup study RID files
+rm $1_studyRID.json
+rm $1_studyRID.csv
-- 
GitLab