From 158a687eea1e96614365e392a9d729808aa6cb01 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Sat, 8 Aug 2020 21:15:34 -0500 Subject: [PATCH] Add scripts to run from study RID --- .gitignore | 2 ++ cleanup.sh | 2 ++ workflow/scripts/splitStudy.py | 24 ++++++++++++++++++++++++ workflow/scripts/splitStudy.sh | 17 +++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 workflow/scripts/splitStudy.py create mode 100644 workflow/scripts/splitStudy.sh diff --git a/.gitignore b/.gitignore index f500ef7..1228878 100644 --- a/.gitignore +++ b/.gitignore @@ -297,6 +297,8 @@ timeline*.html* *.tmp *.swp *.out +*_studyRID.json +*_studyRID.csv run*.sh !.gitkeep diff --git a/cleanup.sh b/cleanup.sh index 9569ff5..aa28920 100644 --- a/cleanup.sh +++ b/cleanup.sh @@ -5,3 +5,5 @@ rm timeline*.html* rm .nextflow*.log* rm -r .nextflow/ rm -r work/ +rm *_studyRID.json +rm *_studyRID.csv diff --git a/workflow/scripts/splitStudy.py b/workflow/scripts/splitStudy.py new file mode 100644 index 0000000..82ffc28 --- /dev/null +++ b/workflow/scripts/splitStudy.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--studyRID',help="The study RID.",required=True) + args = parser.parse_args() + return args + +def main(): + args = get_args() + studyRID=pd.read_json(args.studyRID+"_studyRID.json") + if studyRID["RID"].count() > 0: + studyRID["RID"].to_csv(args.studyRID+"_studyRID.csv",header=False,index=False) + else: + raise Exception("No associated replicates found: %s" % + studyRID) + +if __name__ == '__main__': + main() diff --git a/workflow/scripts/splitStudy.sh b/workflow/scripts/splitStudy.sh new file mode 100644 index 0000000..a64b6d9 --- /dev/null +++ b/workflow/scripts/splitStudy.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# query GUDMAP/RBK for study RID +echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json + +# extract replicate RIDs +module load python/3.6.4-anaconda +python3 ./workflow/scripts/splitStudy.py -s $1 + +# run pipeline on replicate RIDs in parallel +module load nextflow/20.01.0 +module load singularity/3.5.3 +while read repRID; do echo ${repRID}; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow run workflow/rna-seq.nf --repRID {} + +# cleanup study RID files +rm $1_studyRID.json +rm $1_studyRID.csv -- GitLab