From fa11871a5efbc5211f8366743369876763241242 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Wed, 5 Aug 2020 20:09:31 -0500 Subject: [PATCH] Extract raw reads --- workflow/conf/multiqc_config.yaml | 5 +++-- workflow/rna-seq.nf | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 3c1c535..99a861c 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -80,9 +80,10 @@ custom_data: Ends Stranded Spike-in + Raw Reads Assigned Reads - Read Length - TIN + Median Read Length + Median TIN file_format: 'tsv' section_name: 'TIN' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 1871139..b1cf47e 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -902,6 +902,7 @@ process fastqc { output: path ("*_fastqc.zip") into fastqc + path ("rawRead.csv") into inferMetadata_rawReads script: """ @@ -911,9 +912,18 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log fastqc *.fastq.gz -o . + + # count raw reads + zcat ${fastq[0]} | echo $((`wc -l`/4)) > rawReads.csv """ } +// Extract number of raw reads metadata into channel +rawReadsInfer = Channel.create() +inferMetadata_rawReads.splitCsv(sep: ",", header: false).separate( + rawReadsInfer +) + /* *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates */ @@ -998,6 +1008,7 @@ process aggrQC { val speciesI from speciesInfer_aggrQC val readLengthM from readLengthMeta val readLengthI from readLengthInfer + val rawReadsI from rawReadsInfer val assignedReadsI from assignedReadsInfer val tinMedI from tinMedInfer val expRID @@ -1018,10 +1029,10 @@ process aggrQC { # make metadata table echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log - echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tAssigned Reads\tRead Length\tTIN" > metadata.tsv - echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-" >> metadata.tsv - echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t${readLengthM}\t-" >> metadata.tsv - echo -e "Measured\t-\t${endsManual}\t-\t-\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv + echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv + echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t${readLengthM}\t-" >> metadata.tsv + echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv + echo -e "Measured\t-\t${endsManual}\t-\t-\t${rawReadsI}\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv # remove inner distance report if it is empty (SE repRID) echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log -- GitLab