diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 2d311a42fa17b47f2384ab1eaa3da99acf5b0a5c..52e5d618efc160ea3fb607876bafd382e6c4b6b7 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -179,7 +179,15 @@ process parseMetadata { # Check replicate RID metadata rep=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p repRID) echo "LOG: replicate RID metadata parsed: \${rep}" >> ${repRID}.parseMetadata.err + + # Get experiment RID metadata + exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p expRID) + echo "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.err + # Get study RID metadata + study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p studyRID) + echo "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.err + # Get endedness metadata endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p endsMeta) echo "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.err @@ -201,7 +209,7 @@ process parseMetadata { echo "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.err # Save design file - echo "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv + echo "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${exp},\${study}" > design.csv """ } @@ -211,12 +219,16 @@ endsManual = Channel.create() strandedMeta = Channel.create() spikeMeta = Channel.create() speciesMeta = Channel.create() +expRID = Channel.create() +studyRID = Channel.create() metadata.splitCsv(sep: ",", header: false).separate( endsMeta, endsManual, strandedMeta, spikeMeta, - speciesMeta + speciesMeta, + expRID, + studyRID ) // Replicate metadata for multiple process inputs endsManual.into { @@ -914,6 +926,8 @@ process aggrQC { val strandedI from strandedInfer_aggrQC val spikeI from spikeInfer_aggrQC val speciesI from speciesInfer_aggrQC + val expRID + val studyRID output: path "${repRID}.aggrQC.{out,err}" optional true @@ -924,7 +938,7 @@ process aggrQC { ulimit -a >> ${repRID}.aggrQC.err echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv - echo -e "${repRID}\t-\t-" >> rid.tsv + echo -e "${repRID}\t${expRID}\t${studyRID}" >> rid.tsv echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py index 78b0e2f4e9fb51f9fc827321f303ccc0d3bdcad1..fd73cf34c7084110dfeae52b74e63e2eb96af9a0 100644 --- a/workflow/scripts/parseMeta.py +++ b/workflow/scripts/parseMeta.py @@ -17,7 +17,6 @@ def get_args(): def main(): args = get_args() metaFile = pd.read_csv(args.metaFile,sep=",",header=0) - endsManual = "" # Check replicate RID metadata from 'File.csv' if (args.parameter == "repRID"): @@ -33,6 +32,24 @@ def main(): if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2): print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID)) exit(1) + + # Check experiment RID metadata from 'Experiment.csv' + if (args.parameter == "expRID"): + if (len(metaFile.Experiment_RID.unique()) > 1): + print("There are multiple experoment RID's in the metadata: " + " ".join(metaFile.Experiment_RID.unique())) + exit(1) + else: + exp=metaFile["Experiment_RID"].unique()[0] + print(exp) + + # Check study RID metadata from 'Experiment.csv' + if (args.parameter == "studyRID"): + if (len(metaFile.Study_RID.unique()) > 1): + print("There are multiple study RID's in the metadata: " + " ".join(metaFile.Study_RID.unique())) + exit(1) + else: + study=metaFile["Study_RID"].unique()[0] + print(study) # Get endedness metadata from 'Experiment Settings.csv' if (args.parameter == "endsMeta"):