Skip to content
Snippets Groups Projects
Commit fa11871a authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Extract raw reads

parent 4faeddc4
Branches
Tags
2 merge requests!37v0.0.1,!36Metadata output update
Pipeline #7811 failed with stages
in 2 minutes and 19 seconds
......@@ -80,9 +80,10 @@ custom_data:
Ends
Stranded
Spike-in
Raw Reads
Assigned Reads
Read Length
TIN
Median Read Length
Median TIN
file_format: 'tsv'
section_name: 'TIN'
......
......@@ -902,6 +902,7 @@ process fastqc {
output:
path ("*_fastqc.zip") into fastqc
path ("rawRead.csv") into inferMetadata_rawReads
script:
"""
......@@ -911,9 +912,18 @@ process fastqc {
# run fastqc
echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log
fastqc *.fastq.gz -o .
# count raw reads
zcat ${fastq[0]} | echo $((`wc -l`/4)) > rawReads.csv
"""
}
// Extract number of raw reads metadata into channel
rawReadsInfer = Channel.create()
inferMetadata_rawReads.splitCsv(sep: ",", header: false).separate(
rawReadsInfer
)
/*
*dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
*/
......@@ -998,6 +1008,7 @@ process aggrQC {
val speciesI from speciesInfer_aggrQC
val readLengthM from readLengthMeta
val readLengthI from readLengthInfer
val rawReadsI from rawReadsInfer
val assignedReadsI from assignedReadsInfer
val tinMedI from tinMedInfer
val expRID
......@@ -1018,10 +1029,10 @@ process aggrQC {
# make metadata table
echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tAssigned Reads\tRead Length\tTIN" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t${readLengthM}\t-" >> metadata.tsv
echo -e "Measured\t-\t${endsManual}\t-\t-\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t${readLengthM}\t-" >> metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
echo -e "Measured\t-\t${endsManual}\t-\t-\t${rawReadsI}\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment