diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 3c1c5358358f23478b6531a06a8a739f5b8e6685..99a861cfcbcec497a3818aabb67c2e8d83923c19 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -80,9 +80,10 @@ custom_data: Ends Stranded Spike-in + Raw Reads Assigned Reads - Read Length - TIN + Median Read Length + Median TIN file_format: 'tsv' section_name: 'TIN' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 18711393de284b4de832e612162058181ec782fd..b1cf47e81a40efc33376f3e7041d0a5b981e5ed4 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -902,6 +902,7 @@ process fastqc { output: path ("*_fastqc.zip") into fastqc + path ("rawRead.csv") into inferMetadata_rawReads script: """ @@ -911,9 +912,18 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log fastqc *.fastq.gz -o . + + # count raw reads + zcat ${fastq[0]} | echo $((`wc -l`/4)) > rawReads.csv """ } +// Extract number of raw reads metadata into channel +rawReadsInfer = Channel.create() +inferMetadata_rawReads.splitCsv(sep: ",", header: false).separate( + rawReadsInfer +) + /* *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates */ @@ -998,6 +1008,7 @@ process aggrQC { val speciesI from speciesInfer_aggrQC val readLengthM from readLengthMeta val readLengthI from readLengthInfer + val rawReadsI from rawReadsInfer val assignedReadsI from assignedReadsInfer val tinMedI from tinMedInfer val expRID @@ -1018,10 +1029,10 @@ process aggrQC { # make metadata table echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log - echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tAssigned Reads\tRead Length\tTIN" > metadata.tsv - echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-" >> metadata.tsv - echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t${readLengthM}\t-" >> metadata.tsv - echo -e "Measured\t-\t${endsManual}\t-\t-\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv + echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv + echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t${readLengthM}\t-" >> metadata.tsv + echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv + echo -e "Measured\t-\t${endsManual}\t-\t-\t${rawReadsI}\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv # remove inner distance report if it is empty (SE repRID) echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log