Add in intital scripts.

To run a set of replicates from study RID:
Run in repo root dir:
* `sh scripts/ [studyRID]`
It will run in parallel in batches of 5 replicatesRID with 30 second delays between launches.\
NOTE: Nextflow "local" processes for all replicates will run on the node/machine the bash script is launched from... consider running the study script on the BioHPC's SLURM cluster (use `sbatch`).
#!/usr/bin/env python3
import argparse
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--studyRID',
help="The study RID.", required=True)
args = parser.parse_args()
return args
def main():
args = get_args()
studyRID = pd.read_json(args.studyRID+"_studyRID.json")
if studyRID["RID"].count() > 0:
args.studyRID+"_studyRID.csv", header=False, index=False)
raise Exception("No associated replicates found: %s" %
if __name__ == '__main__':
#SBATCH -p super
#SBATCH --job-name GUDMAP-RBK_Study
#SBATCH -t 7-0:0:0
# query GUDMAP/RBK for study RID
echo "curl --location --request GET '"${1}"'" | bash > $1_studyRID.json
# extract replicate RIDs
module load python/3.6.4-anaconda
python3 ./workflow/scripts/ -s $1
# run pipeline on replicate RIDs in parallel
module load nextflow/20.01.0
module load singularity/3.5.3
while read repRID; do echo ${repRID}; sleep 30; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/ --repRID {} --source production --deriva /project/BICF/BICF_Core/shared/gudmap/test_data/auth/credential.json --bdbag /project/BICF/BICF_Core/shared/gudmap/test_data/auth/cookies.txt --dev false --upload true --email -with-report ./output/{}_report.html -with-timeline ./output/{}_timeline.html
# cleanup study RID files
rm $1_studyRID.json
#rm $1_studyRID.csv
