From 710a67d9f766777667679e24af93212ea764a928 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Sat, 18 Jan 2020 17:55:28 -0600 Subject: [PATCH] Add checks for meta repRID --- workflow/conf/aws_ondemand.config | 7 ------- workflow/conf/aws_spot.config | 7 ------- workflow/conf/biohpc.config | 6 +++--- workflow/nextflow.config | 15 +++++++++++++++ workflow/rna-seq.nf | 1 + workflow/scripts/parseMeta.py | 17 +++++++++++------ 6 files changed, 30 insertions(+), 23 deletions(-) diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config index 1a14ebf..84fcb27 100755 --- a/workflow/conf/aws_ondemand.config +++ b/workflow/conf/aws_ondemand.config @@ -13,14 +13,7 @@ process { cpus = 1 memory = '1 GB' - withName:getBag { - container = 'bicf/gudmaprbkfilexfer:1.3' - } - withName:getData { - container = 'bicf/gudmaprbkfilexfer:1.3' - } withName:trimData { - container = 'bicf/trimgalore:1.1' cpus = 15 } } \ No newline at end of file diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config index b5239a2..fbccb3c 100755 --- a/workflow/conf/aws_spot.config +++ b/workflow/conf/aws_spot.config @@ -13,14 +13,7 @@ process { cpus = 1 memory = '1 GB' - withName:getBag { - container = 'bicf/gudmaprbkfilexfer:1.3' - } - withName:getData { - container = 'bicf/gudmaprbkfilexfer:1.3' - } withName:trimData { - container = 'bicf/trimgalore:1.1' cpus = 15 } } diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 20da91a..36d5b33 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -5,16 +5,16 @@ process { withName:getBag { executor = 'local' - container = 'docker://bicf/gudmaprbkfilexfer:1.3' } withName:getData { executor = 'local' - container = 'docker://bicf/gudmaprbkfilexfer:1.3' } withName:trimData { - container = 'docker://bicf/trimgalore:1.1' queue = '256GB,256GBv1,384GB' } + withName:parseMetadata { + executor = 'local' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 3758499..2635a79 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -10,6 +10,21 @@ profiles { } } +process { + withName:getBag { + container = 'bicf/gudmaprbkfilexfer:1.3' + } + withName:getData { + container = 'bicf/gudmaprbkfilexfer:1.3' + } + withName:trimData { + container = 'bicf/trimgalore:1.1' + } + withName:parseMetadata { + container = 'bicf/python:1.3' + } +} + trace { enabled = true file = 'pipeline_trace.txt' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 6e4199d..a10a29a 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -111,6 +111,7 @@ process parseMetadata { """ hostname >>${repRID_parseMetadata}.parseMetadata.err ulimit -a >>${repRID_parseMetadata}.parseMetadata.err + python ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m ${fileMeta} -p repRID """ } diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py index 115dd6d..bd872fc 100644 --- a/workflow/scripts/parseMeta.py +++ b/workflow/scripts/parseMeta.py @@ -14,14 +14,19 @@ def get_args(): def main(): args = get_args() - metaFile = pd.read_csv(args.metaFile,sep="\t",header=None) + metaFile = pd.read_csv(args.metaFile,sep=",",header=0) if (args.parameter == "repRID"): if (len(metaFile.Replicate_RID.unique()) > 1): - #ERROR - if not (metaFile.Replicate_RID == arg$repRID): - #ERROR - if (len(fileFile[fileFile["File_Type"] == "FastQ"].RID) > 2): - #ERROR + print("There are multiple replicate RID's in the metadata: " + " ".join(metaFile.Replicate_RID.unique())) + exit(1) + if not (metaFile.Replicate_RID.unique() == args.repRID): + print("Replicate RID in metadata does not match run parameters: " + metaFile.Replicate_RID.unique() + " vs " + args.repRID) + exit(1) + else: + print(metaFile["Replicate_RID"].unique()) + if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2): + print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID)) + exit(1) if __name__ == '__main__': -- GitLab