Skip to content
Snippets Groups Projects
Commit 5e2e8f64 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Calculate median TIN and display in report

parent 8a404d50
Branches
Tags
2 merge requests!37v0.0.1,!36Metadata output update
Pipeline #7782 failed with stages
in 2 minutes and 14 seconds
......@@ -70,7 +70,7 @@ custom_data:
meta:
file_format: 'tsv'
section_name: 'Metadata'
description: 'This is the comparison of infered metadata and submitter provided'
description: 'This is the comparison of infered metadata, submitter provided, and calculated'
plot_type: 'table'
pconfig:
id: 'meta'
......@@ -80,6 +80,8 @@ custom_data:
Ends
Stranded
Spike-in
Read Length
TIN
tin:
file_format: 'tsv'
section_name: 'TIN'
......
......@@ -241,7 +241,7 @@ process parseMetadata {
readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength)
if [ "\${readLength}" == "nan"]
then
readLength="Not Entered"
readLength="NA"
fi
echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
......@@ -317,7 +317,7 @@ process trimData {
"""
}
// Split metadata into separate channels
// Extract calculated read length metadata into channel
readLengthInfer = Channel.create()
inferMetadata_readLength.splitCsv(sep: ",", header: false).separate(
readLengthInfer
......@@ -919,7 +919,8 @@ process dataQC {
val ends from endsInfer_dataQC
output:
path "${repRID}.tin.hist.tsv" into tin
path "${repRID}.tin.hist.tsv" into tinHist
path "${repRID}.tin.med.csv" into tinMed
path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance
script:
......@@ -952,6 +953,11 @@ process dataQC {
"""
}
// Extract median TIN into channel
tinMedInfer = Channel.create()
tinMed.splitCsv(sep: ",", header: false).separate(
tinMedInfer,
/*
*aggrQC: aggregate QC from processes as well as metadata and run MultiQC
*/
......@@ -968,7 +974,7 @@ process aggrQC {
path dedupQC
path countsQC
path innerDistance
path tin
path tinHist
path alignSampleQCs from alignSampleQC_aggrQC.collect()
path inferExperiment
val endsManual from endsManual_aggrQC
......@@ -982,6 +988,7 @@ process aggrQC {
val speciesI from speciesInfer_aggrQC
val readLengthM from readLengthMeta
val readLengthI from readLengthInfer
val tinMedI from tinMedInfer
val expRID
val studyRID
......@@ -1000,11 +1007,11 @@ process aggrQC {
# make metadata table
echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRead Length" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-\t-" >> metadata.tsv
echo -e "Measured\t-\t-\t-\t-\t${readLengthI}"
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRead Length\tTIN" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t${readLengthM}\t-" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-\t-\t-" >> metadata.tsv
echo -e "Measured\t-\t-\t-\t-\t${readLengthI}\t${tinMedI}" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
......
......@@ -15,6 +15,7 @@ def get_args():
def main():
args = get_args()
tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',sep="\t",header=0)
hist = pd.cut(tin['TIN'],bins=pd.interval_range(start=0,freq=10,end=100,closed='right')).value_counts(sort=False)
labels = ["{0} - {1}".format(i, i + 9) for i in range(1, 100, 10)]
#labels[0] = '0 - 10'
......@@ -34,6 +35,7 @@ def main():
hist = hist.T.fillna(0.0).astype(int)
#hist = hist.apply(lambda x: x/x.sum()*100, axis=1)
hist.to_csv(args.repRID + '.tin.hist.tsv',sep='\t')
tin['TIN'][(tin['TIN']!=0)].median().to_csv(args.repRID + '.tin.med.csv',sep=',')
if __name__ == '__main__':
main()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment