Skip to content
Snippets Groups Projects
Commit 3bc88431 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add metadata and repRID to multiqc report

parent fcb04ee7
Branches
Tags
2 merge requests!37v0.0.1,!33Resolve "process_qc"
...@@ -13,6 +13,7 @@ report_comment: > ...@@ -13,6 +13,7 @@ report_comment: >
This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a> This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
top_modules: top_modules:
- custom_content
- fastqc: - fastqc:
name: 'Raw' name: 'Raw'
info: 'Replicate Raw fastq QC Results' info: 'Replicate Raw fastq QC Results'
...@@ -35,9 +36,6 @@ top_modules: ...@@ -35,9 +36,6 @@ top_modules:
info: 'Replicate Paired End Inner Distance Distribution Results' info: 'Replicate Paired End Inner Distance Distribution Results'
path_filters: path_filters:
- '*insertSize*' - '*insertSize*'
- custom_content:
name: 'TIN'
info: 'Transcript Integrety Score Distribution Results'
- hisat2: - hisat2:
name: 'Inference: Align' name: 'Inference: Align'
info: 'Inference Alignment (1M downsampled reads) QC Results' info: 'Inference Alignment (1M downsampled reads) QC Results'
...@@ -49,9 +47,41 @@ top_modules: ...@@ -49,9 +47,41 @@ top_modules:
path_filters: path_filters:
- '*infer_experiment*' - '*infer_experiment*'
report_section_order:
rid:
order: 200
meta:
order: 100
tin:
order: -100
skip_generalstats: true skip_generalstats: true
custom_data: custom_data:
rid:
file_format: 'tsv'
section_name: 'RID'
description: 'This is the identifying RIDs'
plot_type: 'table'
pconfig:
id: 'rid'
headers:
Replicate RID
Experiment RID
Study RID
meta:
file_format: 'tsv'
section_name: 'Metadata'
description: 'This is the comparison of infered metadata and submitter provided'
plot_type: 'table'
pconfig:
id: 'meta'
headers:
Source
Species
Ends
Stranded
Spike-in
tin: tin:
file_format: 'tsv' file_format: 'tsv'
section_name: 'TIN' section_name: 'TIN'
...@@ -71,6 +101,11 @@ custom_data: ...@@ -71,6 +101,11 @@ custom_data:
70 - 79 70 - 79
80 - 89 80 - 89
90 - 99 90 - 99
sp: sp:
rid:
fn: 'rid.tsv'
meta:
fn: 'metadata.tsv'
tin: tin:
fn: '*.tin.hist.tsv' fn: '*.tin.hist.tsv'
...@@ -223,6 +223,7 @@ endsManual.into { ...@@ -223,6 +223,7 @@ endsManual.into {
endsManual_trimData endsManual_trimData
endsManual_downsampleData endsManual_downsampleData
endsManual_alignSampleData endsManual_alignSampleData
endsManual_aggrQC
} }
...@@ -533,16 +534,20 @@ endsInfer.into { ...@@ -533,16 +534,20 @@ endsInfer.into {
endsInfer_alignData endsInfer_alignData
endsInfer_countData endsInfer_countData
endsInfer_dataQC endsInfer_dataQC
endsInfer_aggrQC
} }
strandedInfer.into { strandedInfer.into {
strandedInfer_alignData strandedInfer_alignData
strandedInfer_countData strandedInfer_countData
strandedInfer_aggrQC
} }
spikeInfer.into{ spikeInfer.into{
spikeInfer_getRef spikeInfer_getRef
spikeInfer_aggrQC
} }
speciesInfer.into { speciesInfer.into {
speciesInfer_getRef speciesInfer_getRef
speciesInfer_aggrQC
} }
...@@ -872,7 +877,7 @@ process dataQC { ...@@ -872,7 +877,7 @@ process dataQC {
# bin TIN values # bin TIN values
python3 ${script_tinHist} -r ${repRID} python3 ${script_tinHist} -r ${repRID}
# calculate inner-distances for PE dat # calculate inner-distances for PE data
if [ "${ends}" == "pe" ] if [ "${ends}" == "pe" ]
then then
inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>>${repRID}.dataQC.out 2>>${repRID}.dataQC.err inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>>${repRID}.dataQC.out 2>>${repRID}.dataQC.err
...@@ -896,10 +901,19 @@ process aggrQC { ...@@ -896,10 +901,19 @@ process aggrQC {
path alignQC path alignQC
path dedupQC path dedupQC
path countsQC path countsQC
path tin
path innerDistance path innerDistance
path tin
path alignSampleQCs from alignSampleQC_aggrQC.collect() path alignSampleQCs from alignSampleQC_aggrQC.collect()
path inferExperiment path inferExperiment
val endsManual from endsManual_aggrQC
val endsM from endsMeta
val strandedM from strandedMeta
val spikeM from spikeMeta
val speciesM from speciesMeta
val endsI from endsInfer_aggrQC
val strandedI from strandedInfer_aggrQC
val spikeI from spikeInfer_aggrQC
val speciesI from speciesInfer_aggrQC
output: output:
path "${repRID}.aggrQC.{out,err}" optional true path "${repRID}.aggrQC.{out,err}" optional true
...@@ -909,10 +923,21 @@ process aggrQC { ...@@ -909,10 +923,21 @@ process aggrQC {
hostname > ${repRID}.aggrQC.err hostname > ${repRID}.aggrQC.err
ulimit -a >> ${repRID}.aggrQC.err ulimit -a >> ${repRID}.aggrQC.err
echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv
echo -e "${repRID}\t-\t-" >> rid.tsv
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID)
if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ] if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ]
then then
rm -f ${innerDistance} rm -f ${innerDistance}
fi fi
#run MultiQC
multiqc -c ${multiqcConfig} . multiqc -c ${multiqcConfig} .
""" """
} }
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment