diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index b279e51b6e217de96cea6ac80e25bfef95e5f00d..5a8b614af62d339d32fa6b0bd92f8f00df77d673 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow // Define input variables -params.deriva = "${baseDir}/../test_data/credential.json" -params.bdbag = "${baseDir}/../test_data/cookies.txt" +params.deriva = "${baseDir}/../test_data/auth/credential.json" +params.bdbag = "${baseDir}/../test_data/auth/cookies.txt" //params.repRID = "16-1ZX4" params.repRID = "Q-Y5JA" @@ -115,12 +115,7 @@ process parseMetadata { path experimentMeta output: - val endsMeta - val endsManual - val ends - val stranded - val spike - val specie + path 'design.csv' into metadata script: """ @@ -132,21 +127,12 @@ process parseMetadata { echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID}.parseMetadata.err # Get endedness metadata - endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p ends) + endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p endsMeta) echo "LOG: endedness metadata parsed: \${endsMeta}" >>${repRID}.parseMetadata.err # Manually get endness endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p endsManual) echo "LOG: endedness manually detected: \${endsManual}" >>${repRID}.parseMetadata.err - - if [ '\${endsMeta}' == 'uk' ] - then - ends=\${endsManual} - echo "LOG: manual detected endness used: \${ends}" >>${repRID}.parseMetadata.err - else - ends=\${endsMeta} - echo "LOG: metadata endness used: \${ends}" >>${repRID}.parseMetadata.err - fi # Get strandedness metadata stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded) @@ -157,12 +143,18 @@ process parseMetadata { echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID}.parseMetadata.err # Get species metadata - specie=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p specie) - echo "LOG: species metadata parsed: \${specie}" >>${repRID}.parseMetadata.err + species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species) + echo "LOG: species metadata parsed: \${species}" >>${repRID}.parseMetadata.err + + # Save design file + echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv """ } -ends_trimData = ends +metadata.splitCsv(sep: ',', header: false).into { + metadata_trimData + metadata_qc +} /* * trimData: trims any adapter or non-host sequences from the data @@ -173,7 +165,7 @@ process trimData { input: file(fastq) from fastqs - val ends_trimData + tuple val(rep), val(endsMeta), val(endsManual), val(stranded), val(spike), val(species) from metadata_trimData output: path ("*.fq.gz") into fastqs_trimmed @@ -186,7 +178,7 @@ process trimData { ulimit -a >>${repRID}.trimData.err # trim fastqs - if [ '${ends_trimData}' == 'se' ] + if [ '${endsManual}' == 'se' ] then trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err; else diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py index f5aacc39558f77c4150a7c687b542ab249c36ba4..43ca2392078171ec9a1f42f7f9a83d13d0f0383b 100644 --- a/workflow/scripts/parseMeta.py +++ b/workflow/scripts/parseMeta.py @@ -17,6 +17,8 @@ def get_args(): def main(): args = get_args() metaFile = pd.read_csv(args.metaFile,sep=",",header=0) + + # Check replicate RID metadata from 'File.csv' if (args.parameter == "repRID"): if (len(metaFile.Replicate_RID.unique()) > 1): print("There are multiple replicate RID's in the metadata: " + " ".join(metaFile.Replicate_RID.unique())) @@ -30,20 +32,26 @@ def main(): if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2): print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID)) exit(1) - if (args.parameter == "ends"): + + # Get endedness metadata from 'Experiment Settings.csv' + if (args.parameter == "endsMeta"): if (metaFile.Paired_End.unique() == "Single End"): - ends = "se" + endsMeta = "se" elif (metaFile.Paired_End.unique() == "Paired End"): - ends = "pe" + endsMeta = "pe" else: - ends = "uk" - print(ends) + endsMeta = "uk" + print(endsMeta) + + # Manually get endness count from 'File.csv' if (args.parameter == "endsManual"): if (len(metaFile[metaFile["File_Type"] == "FastQ"]) == 1): endsManual = "se" elif (len(metaFile[metaFile["File_Type"] == "FastQ"]) == 2): endsManual = "pe" print(endsManual) + + # Get strandedness metadata from 'Experiment Settings.csv' if (args.parameter == "stranded"): if (metaFile.Has_Strand_Specific_Information.unique() == "yes"): stranded = "stranded" @@ -53,6 +61,8 @@ def main(): print("Stranded metadata not match expected options: " + metaFile.Has_Strand_Specific_Information.unique()) exit(1) print(stranded) + + # Get spike-in metadata from 'Experiment Settings.csv' if (args.parameter == "spike"): if (metaFile.Used_Spike_Ins.unique() == "yes"): spike = "yes" @@ -62,15 +72,17 @@ def main(): print("Spike-ins metadata not match expected options: " + metaFile.Used_Spike_Ins.unique()) exit(1) print(spike) - if (args.parameter == "specie"): + + # Get species metadata from 'Experiment.csv' + if (args.parameter == "species"): if (metaFile.Species.unique() == "Mus musculus"): - specie = "Mus musculus" + species = "Mus musculus" elif (metaFile.Species.unique() == "Homo sapiens"): - specie = "Homo sapiens" + species = "Homo sapiens" else: print("Species metadata not match expected options: " + metaFile.Species.unique()) exit(1) - print(specie) + print(species) if __name__ == '__main__': main() \ No newline at end of file