Skip to content
Snippets Groups Projects
Commit 79f7c244 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add new custom tables to multiqc #72

parent d5c23e26
Branches
Tags
2 merge requests!43Develop,!420.0.3
# v0.0.2 (in development) # v0.0.3 (in development)
**User Facing**
* TPM table:
* Add Ensembl Gene ID
* Rename columns: *GENCODE_Gene_Symbol*, *Ensembl_GeneID*, *NCBI_GeneID*
* MultiQC output custom talbes (html+JSON):
* Run table: *Session ID* and *Pipeline Version*
* Reference Table: *Species*, *Genome Reference Consortium Build*, *Genome Reference Consortium Patch*, *GENCODE Annotation Release* (ouputs both human and mouse versions)
**Background**
* Add GeneSymbol/EnsemblID/EntrezID translation files to references
*Known Bugs*
* outputBag does not contain fetch for processed data
* Does not include automatic data upload
<hr>
# v0.0.2
**User Facing** **User Facing**
* Output: * Output:
* inputBag * inputBag
......
...@@ -48,14 +48,28 @@ top_modules: ...@@ -48,14 +48,28 @@ top_modules:
- '*infer_experiment*' - '*infer_experiment*'
report_section_order: report_section_order:
run:
order: 4000
rid: rid:
order: 2000 order: 3000
meta: meta:
order: 2000
ref:
order: 1000 order: 1000
skip_generalstats: true skip_generalstats: true
custom_data: custom_data:
run:
file_format: 'tsv'
section_name: 'Run'
description: 'This is the run information'
plot_type: 'table'
pconfig:
id: 'run'
format: '{:,.0f}'
headers:
Session ID
rid: rid:
file_format: 'tsv' file_format: 'tsv'
section_name: 'RID' section_name: 'RID'
...@@ -74,6 +88,7 @@ custom_data: ...@@ -74,6 +88,7 @@ custom_data:
plot_type: 'table' plot_type: 'table'
pconfig: pconfig:
id: 'meta' id: 'meta'
scale: false
format: '{:,.0f}' format: '{:,.0f}'
headers: headers:
Source Source
...@@ -85,6 +100,21 @@ custom_data: ...@@ -85,6 +100,21 @@ custom_data:
Assigned Reads Assigned Reads
Median Read Length Median Read Length
Median TIN Median TIN
Pipeline Version
ref:
file_format: 'tsv'
section_name: 'Reference'
description: 'This is the referenec version information'
plot_type: 'table'
pconfig:
id: 'ref'
scale: false
format: '{}'
headers:
Species
Genome Reference Consortium Build
Genome Reference Consortium Patch
GENCODE Annotation Release"
tin: tin:
file_format: 'tsv' file_format: 'tsv'
section_name: 'TIN' section_name: 'TIN'
...@@ -106,9 +136,13 @@ custom_data: ...@@ -106,9 +136,13 @@ custom_data:
90 - 99 90 - 99
sp: sp:
run:
fn: "run.tsv"
rid: rid:
fn: 'rid.tsv' fn: 'rid.tsv'
meta: meta:
fn: 'metadata.tsv' fn: 'metadata.tsv'
ref:
fn: 'reference.tsv'
tin: tin:
fn: '*.tin.hist.tsv' fn: '*.tin.hist.tsv'
\ No newline at end of file
...@@ -97,6 +97,6 @@ manifest { ...@@ -97,6 +97,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq' homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.' description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript = 'rna-seq.nf' mainScript = 'rna-seq.nf'
version = 'v0.0.2_indev' version = 'v0.0.3_indev'
nextflowVersion = '>=19.09.0' nextflowVersion = '>=19.09.0'
} }
...@@ -1034,6 +1034,11 @@ process aggrQC { ...@@ -1034,6 +1034,11 @@ process aggrQC {
hostname > ${repRID}.aggrQC.log hostname > ${repRID}.aggrQC.log
ulimit -a >> ${repRID}.aggrQC.log ulimit -a >> ${repRID}.aggrQC.log
# make run table
echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log
echo -e "Session ID\tPipeline Version" > run.tsv
echo -e "${workflow.sessionId}\t${workflow.manifest.version}" >> run.tsv
# make RID table # make RID table
echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log
echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv
...@@ -1046,6 +1051,12 @@ process aggrQC { ...@@ -1046,6 +1051,12 @@ process aggrQC {
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv
# make reference table
echo -e "LOG: creating referencerun table" >> ${repRID}.aggrQC.log
echo -e "Species\tGenome Reference Consortium Build\tGenome Reference Consortium Patch\tGENCODE Annotation Release" > reference.tsv
echo -e "Human\tGRCh\$(echo `echo ${params.refHuVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refHuVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refHuVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
echo -e "Mouse\tGRCm\$(echo `echo ${params.refMoVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refMoVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refMoVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
# remove inner distance report if it is empty (SE repRID) # remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
if [ "${endsM}" == "se" ] if [ "${endsM}" == "se" ]
...@@ -1081,5 +1092,4 @@ process outputBag { ...@@ -1081,5 +1092,4 @@ process outputBag {
cp ${multiqcJSON} Replicate_${repRID}.outputBag cp ${multiqcJSON} Replicate_${repRID}.outputBag
bdbag Replicate_${repRID}.outputBag --archiver zip bdbag Replicate_${repRID}.outputBag --archiver zip
""" """
} }
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment