From c018e25ca2f3ef2647c1fabd5de741549793840f Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Fri, 8 Jan 2021 16:38:31 -0600
Subject: [PATCH] Remove split study from repository. Moved to indepednent
 repository. Close #87.

---
 README.md                       |  6 ------
 workflow/scripts/split_study.py | 29 -----------------------------
 workflow/scripts/split_study.sh | 21 ---------------------
 3 files changed, 56 deletions(-)
 delete mode 100644 workflow/scripts/split_study.py
 delete mode 100644 workflow/scripts/split_study.sh

diff --git a/README.md b/README.md
index b21653d..efd7ea1 100644
--- a/README.md
+++ b/README.md
@@ -80,12 +80,6 @@ To generate you own references or new references:
 Download the [reference creation script](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/snippets/31).
 This script will auto create human and mouse references from GENCODE. It can also create ERCC92 spike-in references as well as concatenate them to GENCODE references automatically. In addition, it can create references from manually downloaded FASTA and GTF files.
 
-To run a set of replicates from study RID:
-------------------------------------------
-Run in repo root dir:
-* `sh workflow/scripts/splitStudy.sh [studyRID]`
-It will run in parallel in batches of 5 replicatesRID with 30 second delays between launches.\
-NOTE: Nextflow "local" processes for all replicates will run on the node/machine the bash script is launched from... consider running the study script on the BioHPC's SLURM cluster (use `sbatch`).
 
 Errors:
 -------
diff --git a/workflow/scripts/split_study.py b/workflow/scripts/split_study.py
deleted file mode 100644
index bf1129e..0000000
--- a/workflow/scripts/split_study.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import pandas as pd
-import warnings
-warnings.simplefilter(action='ignore', category=FutureWarning)
-
-
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-s', '--studyRID',
-                        help="The study RID.", required=True)
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = get_args()
-    studyRID = pd.read_json(args.studyRID+"_studyRID.json")
-    if studyRID["RID"].count() > 0:
-        studyRID["RID"].to_csv(
-            args.studyRID+"_studyRID.csv", header=False, index=False)
-    else:
-        raise Exception("No associated replicates found: %s" %
-                        studyRID)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/workflow/scripts/split_study.sh b/workflow/scripts/split_study.sh
deleted file mode 100644
index aeec0fa..0000000
--- a/workflow/scripts/split_study.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-#SBATCH -p super
-#SBATCH --job-name GUDMAP-RBK_Study
-#SBATCH -t 7-0:0:0
-
-# query GUDMAP/RBK for study RID
-echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json
-
-# extract replicate RIDs
-module load python/3.6.4-anaconda
-python3 ./workflow/scripts/split_study.py -s $1
-
-# run pipeline on replicate RIDs in parallel
-module load nextflow/20.01.0
-module load singularity/3.5.3
-while read repRID; do echo ${repRID}; sleep 30; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/rna-seq.nf --repRID {} --source production --deriva /project/BICF/BICF_Core/shared/gudmap/test_data/auth/credential.json --bdbag /project/BICF/BICF_Core/shared/gudmap/test_data/auth/cookies.txt --dev false --upload true --email gervaise.henry@utsouthwestern.edu -with-report ./output/{}_report.html -with-timeline ./output/{}_timeline.html
-
-# cleanup study RID files
-rm $1_studyRID.json
-#rm $1_studyRID.csv
-- 
GitLab