From c018e25ca2f3ef2647c1fabd5de741549793840f Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Fri, 8 Jan 2021 16:38:31 -0600 Subject: [PATCH] Remove split study from repository. Moved to indepednent repository. Close #87. --- README.md | 6 ------ workflow/scripts/split_study.py | 29 ----------------------------- workflow/scripts/split_study.sh | 21 --------------------- 3 files changed, 56 deletions(-) delete mode 100644 workflow/scripts/split_study.py delete mode 100644 workflow/scripts/split_study.sh diff --git a/README.md b/README.md index b21653d..efd7ea1 100644 --- a/README.md +++ b/README.md @@ -80,12 +80,6 @@ To generate you own references or new references: Download the [reference creation script](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/snippets/31). This script will auto create human and mouse references from GENCODE. It can also create ERCC92 spike-in references as well as concatenate them to GENCODE references automatically. In addition, it can create references from manually downloaded FASTA and GTF files. -To run a set of replicates from study RID: ------------------------------------------- -Run in repo root dir: -* `sh workflow/scripts/splitStudy.sh [studyRID]` -It will run in parallel in batches of 5 replicatesRID with 30 second delays between launches.\ -NOTE: Nextflow "local" processes for all replicates will run on the node/machine the bash script is launched from... consider running the study script on the BioHPC's SLURM cluster (use `sbatch`). Errors: ------- diff --git a/workflow/scripts/split_study.py b/workflow/scripts/split_study.py deleted file mode 100644 index bf1129e..0000000 --- a/workflow/scripts/split_study.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import pandas as pd -import warnings -warnings.simplefilter(action='ignore', category=FutureWarning) - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('-s', '--studyRID', - help="The study RID.", required=True) - args = parser.parse_args() - return args - - -def main(): - args = get_args() - studyRID = pd.read_json(args.studyRID+"_studyRID.json") - if studyRID["RID"].count() > 0: - studyRID["RID"].to_csv( - args.studyRID+"_studyRID.csv", header=False, index=False) - else: - raise Exception("No associated replicates found: %s" % - studyRID) - - -if __name__ == '__main__': - main() diff --git a/workflow/scripts/split_study.sh b/workflow/scripts/split_study.sh deleted file mode 100644 index aeec0fa..0000000 --- a/workflow/scripts/split_study.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -#SBATCH -p super -#SBATCH --job-name GUDMAP-RBK_Study -#SBATCH -t 7-0:0:0 - -# query GUDMAP/RBK for study RID -echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json - -# extract replicate RIDs -module load python/3.6.4-anaconda -python3 ./workflow/scripts/split_study.py -s $1 - -# run pipeline on replicate RIDs in parallel -module load nextflow/20.01.0 -module load singularity/3.5.3 -while read repRID; do echo ${repRID}; sleep 30; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/rna-seq.nf --repRID {} --source production --deriva /project/BICF/BICF_Core/shared/gudmap/test_data/auth/credential.json --bdbag /project/BICF/BICF_Core/shared/gudmap/test_data/auth/cookies.txt --dev false --upload true --email gervaise.henry@utsouthwestern.edu -with-report ./output/{}_report.html -with-timeline ./output/{}_timeline.html - -# cleanup study RID files -rm $1_studyRID.json -#rm $1_studyRID.csv -- GitLab