Skip to content
Snippets Groups Projects
Commit 788a539f authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Extract assigned reads count

parent 740d2eff
Branches
Tags
2 merge requests!37v0.0.1,!36Metadata output update
Pipeline #7788 failed with stages
in 2 minutes and 34 seconds
...@@ -80,10 +80,12 @@ custom_data: ...@@ -80,10 +80,12 @@ custom_data:
Ends Ends
Stranded Stranded
Spike-in Spike-in
Assigned Reads:
format: '{:,.0f}'
Read Length: Read Length:
format: '{:,.0f}' format: '{:,.0f}'
TIN: TIN:
format: '{:+.1f}' format: '{:+.1f}'
file_format: 'tsv' file_format: 'tsv'
section_name: 'TIN' section_name: 'TIN'
......
...@@ -842,6 +842,7 @@ process countData { ...@@ -842,6 +842,7 @@ process countData {
output: output:
path ("*.countTable.csv") into counts path ("*.countTable.csv") into counts
path ("*.countData.summary") into countsQC path ("*.countData.summary") into countsQC
path ("assignedReads") into inferMetadata_assignedReads
script: script:
""" """
...@@ -861,7 +862,7 @@ process countData { ...@@ -861,7 +862,7 @@ process countData {
elif [ "${stranded}" == "reverse" ] elif [ "${stranded}" == "reverse" ]
then then
stranding=2 stranding=2
echo -e "LOG: strandedness set to forward stranded [2]" >> ${repRID}.countData.log echo -e "LOG: strandedness set to reverse stranded [2]" >> ${repRID}.countData.log
fi fi
# run featureCounts # run featureCounts
...@@ -875,6 +876,11 @@ process countData { ...@@ -875,6 +876,11 @@ process countData {
fi fi
echo -e "LOG: counted" >> ${repRID}.countData.log echo -e "LOG: counted" >> ${repRID}.countData.log
assignedReads=grep -m 1 'Assigned' *.countData.summary | grep -oe '\([0-9.]*\)'
echo -e \${assignedReads} > assignedReads.csv
echo -e "LOG: assigned reads: "\${assignedReads} >> ${repRID}.countData.log
# calculate TPM from the resulting countData table # calculate TPM from the resulting countData table
echo -e "LOG: calculating TPM with R" >> ${repRID}.countData.log echo -e "LOG: calculating TPM with R" >> ${repRID}.countData.log
Rscript calculateTPM.R --count "${repRID}.countData" Rscript calculateTPM.R --count "${repRID}.countData"
...@@ -905,6 +911,12 @@ process fastqc { ...@@ -905,6 +911,12 @@ process fastqc {
""" """
} }
// Extract number of assigned reads metadata into channel
assignedReadsInfer = Channel.create()
inferMetadata_assignedReads.splitCsv(sep: ",", header: false).separate(
assignedReads
)
/* /*
*dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
*/ */
...@@ -920,7 +932,7 @@ process dataQC { ...@@ -920,7 +932,7 @@ process dataQC {
output: output:
path "${repRID}.tin.hist.tsv" into tinHist path "${repRID}.tin.hist.tsv" into tinHist
path "${repRID}.tin.med.csv" into tinMed path "${repRID}.tin.med.csv" into inferMetadata_tinMed
path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance
script: script:
...@@ -955,7 +967,7 @@ process dataQC { ...@@ -955,7 +967,7 @@ process dataQC {
// Extract median TIN metadata into channel // Extract median TIN metadata into channel
tinMedInfer = Channel.create() tinMedInfer = Channel.create()
tinMed.splitCsv(sep: ",", header: false).separate( inferMetadata_tinMed.splitCsv(sep: ",", header: false).separate(
tinMedInfer tinMedInfer
) )
...@@ -989,6 +1001,7 @@ process aggrQC { ...@@ -989,6 +1001,7 @@ process aggrQC {
val speciesI from speciesInfer_aggrQC val speciesI from speciesInfer_aggrQC
val readLengthM from readLengthMeta val readLengthM from readLengthMeta
val readLengthI from readLengthInfer val readLengthI from readLengthInfer
val assignedReadsI from assignedReadsInfer
val tinMedI from tinMedInfer val tinMedI from tinMedInfer
val expRID val expRID
val studyRID val studyRID
...@@ -1008,11 +1021,10 @@ process aggrQC { ...@@ -1008,11 +1021,10 @@ process aggrQC {
# make metadata table # make metadata table
echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRead Length\tTIN" > metadata.tsv echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tAssigned Reads\tRead Length\tTIN" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-" >> metadata.tsv echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}\t-" >> metadata.tsv echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}\t-\t-" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-\t-\t-" >> metadata.tsv echo -e "Measured\t-\t${endsManual}\t-\t-\t${assignedReadsI}\t${readLengthI}\t${tinMedI}" >> metadata.tsv
echo -e "Measured\t-\t-\t-\t-\t${readLengthI}\t${tinMedI}" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID) # remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment