From 2be5d85a29c62ebe8ce7e81ca69d56732e4f2f0b Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Sat, 18 Jan 2020 15:24:48 -0600 Subject: [PATCH] Initial code --- workflow/rna-seq.nf | 27 ++++++++++++++++++++++++--- workflow/scripts/parseMeta.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 workflow/scripts/parseMeta.py diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index c1d72ef..6e4199d 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -20,6 +20,7 @@ Channel.from(params.repRID) .into { repRID_getBag repRID_getData + repRID_parseMetadata repRID_trimData } @@ -33,7 +34,6 @@ derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json") * getData: get bagit file from consortium */ process getBag { - executor 'local' tag "${repRID_getBag}" publishDir "${logsDir}/getBag", mode: 'symlink', pattern: "${repRID_getBag}.getBag.err" @@ -66,7 +66,6 @@ process getData { input: val repRID_getData - executor 'local' path cookies, stageAs: 'deriva-cookies.txt' from bdbag path bagit @@ -77,7 +76,6 @@ process getData { file("**/Experiment.csv") into experimentMeta file ("${repRID_getData}.getData.err") - script: """ hostname >>${repRID_getData}.getData.err @@ -94,6 +92,29 @@ process getData { """ } +/* + * parseMetadata: parses metadata to extract experiment parameters +*/ +process parseMetadata { + tag "${repRID_parseMetadata}" + publishDir "${logsDir}/parseMetadata", mode: 'symlink', pattern: "${repRID_parseMetadata}.parseMetadata.err" + + input: + val repRID_parseMetadata + file fileMeta + file experimentSettingsMeta + file experimentMeta + + output: + + script: + """ + hostname >>${repRID_parseMetadata}.parseMetadata.err + ulimit -a >>${repRID_parseMetadata}.parseMetadata.err + """ + +} + /* * trimData: trims any adapter or non-host sequences from the data */ diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py new file mode 100644 index 0000000..115dd6d --- /dev/null +++ b/workflow/scripts/parseMeta.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-r', '--repRID',help="The replicate RID.",required=True) + parser.add_argument('-m', '--metaFile',help="The metadata file to extract.",required=True) + parser.add_argument('-p', '--parameter',help="The parameter to extract.",required=True) + args = parser.parse_args() + return args + + +def main(): + args = get_args() + metaFile = pd.read_csv(args.metaFile,sep="\t",header=None) + if (args.parameter == "repRID"): + if (len(metaFile.Replicate_RID.unique()) > 1): + #ERROR + if not (metaFile.Replicate_RID == arg$repRID): + #ERROR + if (len(fileFile[fileFile["File_Type"] == "FastQ"].RID) > 2): + #ERROR + + +if __name__ == '__main__': + main() \ No newline at end of file -- GitLab