Skip to content
Snippets Groups Projects
Commit 5e2e8f64 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Calculate median TIN and display in report

parent 8a404d50
Branches
Tags
2 merge requests!37v0.0.1,!36Metadata output update
Pipeline #7782 failed with stages
in 2 minutes and 14 seconds
...@@ -70,7 +70,7 @@ custom_data: ...@@ -70,7 +70,7 @@ custom_data:
meta: meta:
file_format: 'tsv' file_format: 'tsv'
section_name: 'Metadata' section_name: 'Metadata'
description: 'This is the comparison of infered metadata and submitter provided' description: 'This is the comparison of infered metadata, submitter provided, and calculated'
plot_type: 'table' plot_type: 'table'
pconfig: pconfig:
id: 'meta' id: 'meta'
...@@ -80,6 +80,8 @@ custom_data: ...@@ -80,6 +80,8 @@ custom_data:
Ends Ends
Stranded Stranded
Spike-in Spike-in
Read Length
TIN
tin: tin:
file_format: 'tsv' file_format: 'tsv'
section_name: 'TIN' section_name: 'TIN'
......
...@@ -241,7 +241,7 @@ process parseMetadata { ...@@ -241,7 +241,7 @@ process parseMetadata {
readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength) readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength)
if [ "\${readLength}" == "nan"] if [ "\${readLength}" == "nan"]
then then
readLength="Not Entered" readLength="NA"
fi fi
echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
...@@ -317,7 +317,7 @@ process trimData { ...@@ -317,7 +317,7 @@ process trimData {
""" """
} }
// Split metadata into separate channels // Extract calculated read length metadata into channel
readLengthInfer = Channel.create() readLengthInfer = Channel.create()
inferMetadata_readLength.splitCsv(sep: ",", header: false).separate( inferMetadata_readLength.splitCsv(sep: ",", header: false).separate(
readLengthInfer readLengthInfer
...@@ -919,7 +919,8 @@ process dataQC { ...@@ -919,7 +919,8 @@ process dataQC {
val ends from endsInfer_dataQC val ends from endsInfer_dataQC
output: output:
path "${repRID}.tin.hist.tsv" into tin path "${repRID}.tin.hist.tsv" into tinHist
path "${repRID}.tin.med.csv" into tinMed
path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance
script: script:
...@@ -952,6 +953,11 @@ process dataQC { ...@@ -952,6 +953,11 @@ process dataQC {
""" """
} }
// Extract median TIN into channel
tinMedInfer = Channel.create()
tinMed.splitCsv(sep: ",", header: false).separate(
tinMedInfer,
/* /*
*aggrQC: aggregate QC from processes as well as metadata and run MultiQC *aggrQC: aggregate QC from processes as well as metadata and run MultiQC
*/ */
...@@ -968,7 +974,7 @@ process aggrQC { ...@@ -968,7 +974,7 @@ process aggrQC {
path dedupQC path dedupQC
path countsQC path countsQC
path innerDistance path innerDistance
path tin path tinHist
path alignSampleQCs from alignSampleQC_aggrQC.collect() path alignSampleQCs from alignSampleQC_aggrQC.collect()
path inferExperiment path inferExperiment
val endsManual from endsManual_aggrQC val endsManual from endsManual_aggrQC
...@@ -982,6 +988,7 @@ process aggrQC { ...@@ -982,6 +988,7 @@ process aggrQC {
val speciesI from speciesInfer_aggrQC val speciesI from speciesInfer_aggrQC
val readLengthM from readLengthMeta val readLengthM from readLengthMeta
val readLengthI from readLengthInfer val readLengthI from readLengthInfer
val tinMedI from tinMedInfer
val expRID val expRID
val studyRID val studyRID
...@@ -1000,11 +1007,11 @@ process aggrQC { ...@@ -1000,11 +1007,11 @@ process aggrQC {
# make metadata table # make metadata table
echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRead Length" > metadata.tsv echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRead Length\tTIN" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-" >> metadata.tsv echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}" >> metadata.tsv echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}\t-" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-\t-" >> metadata.tsv echo -e "Manual\t-\t${endsManual}\t-\t-\t-\t-" >> metadata.tsv
echo -e "Measured\t-\t-\t-\t-\t${readLengthI}" echo -e "Measured\t-\t-\t-\t-\t${readLengthI}\t${tinMedI}" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID) # remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
......
...@@ -15,6 +15,7 @@ def get_args(): ...@@ -15,6 +15,7 @@ def get_args():
def main(): def main():
args = get_args() args = get_args()
tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',sep="\t",header=0) tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',sep="\t",header=0)
hist = pd.cut(tin['TIN'],bins=pd.interval_range(start=0,freq=10,end=100,closed='right')).value_counts(sort=False) hist = pd.cut(tin['TIN'],bins=pd.interval_range(start=0,freq=10,end=100,closed='right')).value_counts(sort=False)
labels = ["{0} - {1}".format(i, i + 9) for i in range(1, 100, 10)] labels = ["{0} - {1}".format(i, i + 9) for i in range(1, 100, 10)]
#labels[0] = '0 - 10' #labels[0] = '0 - 10'
...@@ -34,6 +35,7 @@ def main(): ...@@ -34,6 +35,7 @@ def main():
hist = hist.T.fillna(0.0).astype(int) hist = hist.T.fillna(0.0).astype(int)
#hist = hist.apply(lambda x: x/x.sum()*100, axis=1) #hist = hist.apply(lambda x: x/x.sum()*100, axis=1)
hist.to_csv(args.repRID + '.tin.hist.tsv',sep='\t') hist.to_csv(args.repRID + '.tin.hist.tsv',sep='\t')
tin['TIN'][(tin['TIN']!=0)].median().to_csv(args.repRID + '.tin.med.csv',sep=',')
if __name__ == '__main__': if __name__ == '__main__':
main() main()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment