Skip to content
Snippets Groups Projects
Commit 3bc88431 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add metadata and repRID to multiqc report

parent fcb04ee7
Branches
Tags
2 merge requests!37v0.0.1,!33Resolve "process_qc"
......@@ -13,6 +13,7 @@ report_comment: >
This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
top_modules:
- custom_content
- fastqc:
name: 'Raw'
info: 'Replicate Raw fastq QC Results'
......@@ -35,9 +36,6 @@ top_modules:
info: 'Replicate Paired End Inner Distance Distribution Results'
path_filters:
- '*insertSize*'
- custom_content:
name: 'TIN'
info: 'Transcript Integrety Score Distribution Results'
- hisat2:
name: 'Inference: Align'
info: 'Inference Alignment (1M downsampled reads) QC Results'
......@@ -49,9 +47,41 @@ top_modules:
path_filters:
- '*infer_experiment*'
report_section_order:
rid:
order: 200
meta:
order: 100
tin:
order: -100
skip_generalstats: true
custom_data:
rid:
file_format: 'tsv'
section_name: 'RID'
description: 'This is the identifying RIDs'
plot_type: 'table'
pconfig:
id: 'rid'
headers:
Replicate RID
Experiment RID
Study RID
meta:
file_format: 'tsv'
section_name: 'Metadata'
description: 'This is the comparison of infered metadata and submitter provided'
plot_type: 'table'
pconfig:
id: 'meta'
headers:
Source
Species
Ends
Stranded
Spike-in
tin:
file_format: 'tsv'
section_name: 'TIN'
......@@ -71,6 +101,11 @@ custom_data:
70 - 79
80 - 89
90 - 99
sp:
rid:
fn: 'rid.tsv'
meta:
fn: 'metadata.tsv'
tin:
fn: '*.tin.hist.tsv'
......@@ -223,6 +223,7 @@ endsManual.into {
endsManual_trimData
endsManual_downsampleData
endsManual_alignSampleData
endsManual_aggrQC
}
......@@ -533,16 +534,20 @@ endsInfer.into {
endsInfer_alignData
endsInfer_countData
endsInfer_dataQC
endsInfer_aggrQC
}
strandedInfer.into {
strandedInfer_alignData
strandedInfer_countData
strandedInfer_aggrQC
}
spikeInfer.into{
spikeInfer_getRef
spikeInfer_aggrQC
}
speciesInfer.into {
speciesInfer_getRef
speciesInfer_aggrQC
}
......@@ -872,7 +877,7 @@ process dataQC {
# bin TIN values
python3 ${script_tinHist} -r ${repRID}
# calculate inner-distances for PE dat
# calculate inner-distances for PE data
if [ "${ends}" == "pe" ]
then
inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>>${repRID}.dataQC.out 2>>${repRID}.dataQC.err
......@@ -896,10 +901,19 @@ process aggrQC {
path alignQC
path dedupQC
path countsQC
path tin
path innerDistance
path tin
path alignSampleQCs from alignSampleQC_aggrQC.collect()
path inferExperiment
val endsManual from endsManual_aggrQC
val endsM from endsMeta
val strandedM from strandedMeta
val spikeM from spikeMeta
val speciesM from speciesMeta
val endsI from endsInfer_aggrQC
val strandedI from strandedInfer_aggrQC
val spikeI from spikeInfer_aggrQC
val speciesI from speciesInfer_aggrQC
output:
path "${repRID}.aggrQC.{out,err}" optional true
......@@ -909,10 +923,21 @@ process aggrQC {
hostname > ${repRID}.aggrQC.err
ulimit -a >> ${repRID}.aggrQC.err
echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv
echo -e "${repRID}\t-\t-" >> rid.tsv
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID)
if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ]
then
rm -f ${innerDistance}
fi
#run MultiQC
multiqc -c ${multiqcConfig} .
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment