diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 934ee6aeb2b47a69dc5cae48fca8a6ca2132888d..a58fd6eb08ca64d17d40d3abae7b52f074be7feb 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -13,6 +13,7 @@ report_comment: > This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a> top_modules: + - custom_content - fastqc: name: 'Raw' info: 'Replicate Raw fastq QC Results' @@ -35,9 +36,6 @@ top_modules: info: 'Replicate Paired End Inner Distance Distribution Results' path_filters: - '*insertSize*' - - custom_content: - name: 'TIN' - info: 'Transcript Integrety Score Distribution Results' - hisat2: name: 'Inference: Align' info: 'Inference Alignment (1M downsampled reads) QC Results' @@ -49,9 +47,41 @@ top_modules: path_filters: - '*infer_experiment*' +report_section_order: + rid: + order: 200 + meta: + order: 100 + tin: + order: -100 + skip_generalstats: true custom_data: + rid: + file_format: 'tsv' + section_name: 'RID' + description: 'This is the identifying RIDs' + plot_type: 'table' + pconfig: + id: 'rid' + headers: + Replicate RID + Experiment RID + Study RID + meta: + file_format: 'tsv' + section_name: 'Metadata' + description: 'This is the comparison of infered metadata and submitter provided' + plot_type: 'table' + pconfig: + id: 'meta' + headers: + Source + Species + Ends + Stranded + Spike-in tin: file_format: 'tsv' section_name: 'TIN' @@ -71,6 +101,11 @@ custom_data: 70 - 79 80 - 89 90 - 99 + sp: + rid: + fn: 'rid.tsv' + meta: + fn: 'metadata.tsv' tin: fn: '*.tin.hist.tsv' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index b6f7d0e28ea6747bca57c6b684ed4831f75f02e1..2d311a42fa17b47f2384ab1eaa3da99acf5b0a5c 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -223,6 +223,7 @@ endsManual.into { endsManual_trimData endsManual_downsampleData endsManual_alignSampleData + endsManual_aggrQC } @@ -533,16 +534,20 @@ endsInfer.into { endsInfer_alignData endsInfer_countData endsInfer_dataQC + endsInfer_aggrQC } strandedInfer.into { strandedInfer_alignData strandedInfer_countData + strandedInfer_aggrQC } spikeInfer.into{ spikeInfer_getRef + spikeInfer_aggrQC } speciesInfer.into { speciesInfer_getRef + speciesInfer_aggrQC } @@ -872,7 +877,7 @@ process dataQC { # bin TIN values python3 ${script_tinHist} -r ${repRID} - # calculate inner-distances for PE dat + # calculate inner-distances for PE data if [ "${ends}" == "pe" ] then inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>>${repRID}.dataQC.out 2>>${repRID}.dataQC.err @@ -896,10 +901,19 @@ process aggrQC { path alignQC path dedupQC path countsQC - path tin path innerDistance + path tin path alignSampleQCs from alignSampleQC_aggrQC.collect() path inferExperiment + val endsManual from endsManual_aggrQC + val endsM from endsMeta + val strandedM from strandedMeta + val spikeM from spikeMeta + val speciesM from speciesMeta + val endsI from endsInfer_aggrQC + val strandedI from strandedInfer_aggrQC + val spikeI from spikeInfer_aggrQC + val speciesI from speciesInfer_aggrQC output: path "${repRID}.aggrQC.{out,err}" optional true @@ -909,10 +923,21 @@ process aggrQC { hostname > ${repRID}.aggrQC.err ulimit -a >> ${repRID}.aggrQC.err + echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv + echo -e "${repRID}\t-\t-" >> rid.tsv + + echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv + echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv + echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}" >> metadata.tsv + echo -e "Manual\t-\t${endsManual}\t-\t-" >> metadata.tsv + + # remove inner distance report if it is empty (SE repRID) if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ] then rm -f ${innerDistance} fi + + #run MultiQC multiqc -c ${multiqcConfig} . """ }