diff --git a/CHANGELOG.md b/CHANGELOG.md index 79ba543b66fe443e6b652e1ffd3cc17b3b09797d..58b5bff22cbb6d968ba5ef214f782aa807f7b055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ * Add memory limit per thread for samtools sort (#108) * Remove parsing restrictions for submitted stranded/spike/species (#105, #106) * Pass unidentified ends instead of overwriting it as unknown +* Move fastqc process before trim to catch fastq errors (#107) +* Only use fastq's that match *.R[1,2].fastq.gz naming convention (#107) +* Add error output for no fastq's *Known Bugs* * Override params (inputBag, fastq, species) aren't checked for integrity diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 43d3c04e40557e988c98419dfc8b74793a59154d..4b0056be03e3c840cd5919a281316074bf9b7470 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -287,7 +287,7 @@ if (fastqsForce != "") { fastqs_fastqc } } else { - fastqs.into { + fastqs.collect().into { fastqs_parseMetadata fastqs_fastqc } @@ -304,7 +304,7 @@ process parseMetadata { path file from fileMeta path experimentSettings, stageAs: "ExperimentSettings.csv" from experimentSettingsMeta path experiment from experimentMeta - path (fastq) from fastqs_parseMetadata + path (fastq) from fastqs_parseMetadata.collect() val fastqCount output: @@ -376,6 +376,10 @@ process parseMetadata { then fastqCountError=true fastqCountError_details="**Too many fastqs detected (>2)**" + elif [ "${fastqCount}" -eq "0" ] + then + fastqCountError=true + fastqCountError_details="**No valid fastqs detected (may not match .R{1,2}.fastq.gz convention)**" elif [ "\${endsMeta}" == "se" ] && [ "${fastqCount}" -ne "1" ] then fastqCountError=true @@ -486,6 +490,7 @@ fastqError_fl.splitCsv(sep: ",", header: false).separate( // Replicate errors for multiple process inputs fastqCountError.into { + fastqCountError_fastqc fastqCountError_trimData fastqCountError_getRefInfer fastqCountError_downsampleData @@ -498,7 +503,6 @@ fastqCountError.into { fastqCountError_dedupData fastqCountError_makeBigWig fastqCountError_countData - fastqCountError_fastqc fastqCountError_dataQC fastqCountError_aggrQC fastqCountError_uploadQC @@ -507,6 +511,7 @@ fastqCountError.into { fastqCountError_failPreExecutionRun_fastq } fastqReadError.into { + fastqReadError_fastqc fastqReadError_trimData fastqReadError_getRefInfer fastqReadError_downsampleData @@ -519,7 +524,6 @@ fastqReadError.into { fastqReadError_dedupData fastqReadError_makeBigWig fastqReadError_countData - fastqReadError_fastqc fastqReadError_dataQC fastqReadError_aggrQC fastqReadError_uploadQC @@ -535,12 +539,12 @@ process fastqc { tag "${repRID}" input: - path (fastq) from fastqs_fastqc + path (fastq) from fastqs_fastqc.collect() val fastqCountError_fastqc val fastqReadError_fastqc output: - path ("*.fastq.gz", includeInputs:true) into fastqs_trimData + path ("*.R{1,2}.fastq.gz", includeInputs:true) into fastqs_trimData path ("*_fastqc.zip") into fastqc path ("rawReads.csv") into rawReadsInfer_fl diff --git a/workflow/scripts/bdbag_fetch.sh b/workflow/scripts/bdbag_fetch.sh index c34dc756d0cc5a47382fb9f96267e378c19ae79a..ea2efd055d201aad1615399db35fad2ac87e7d7a 100644 --- a/workflow/scripts/bdbag_fetch.sh +++ b/workflow/scripts/bdbag_fetch.sh @@ -18,7 +18,7 @@ if [ "${validate}" != "is valid" ] then exit 1 fi -for i in $(find */ -name "*R*.fastq.gz") +for i in $(find */ -name "*.R[1-2].fastq.gz") do path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz") cp ${i} ./${path}