Skip to content
Snippets Groups Projects
Commit e32cb502 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Parse metadata

parent 710a67d9
Branches
Tags
2 merge requests!37v0.0.1,!14Resolve "process_createManifest"
Pipeline #5727 failed with stages
in 27 minutes and 57 seconds
......@@ -21,7 +21,7 @@ process {
container = 'bicf/trimgalore:1.1'
}
withName:parseMetadata {
container = 'bicf/python:1.3'
container = 'bicf/python3:1.3'
}
}
......
......@@ -97,23 +97,49 @@ process getData {
*/
process parseMetadata {
tag "${repRID_parseMetadata}"
publishDir "${logsDir}/parseMetadata", mode: 'symlink', pattern: "${repRID_parseMetadata}.parseMetadata.err"
publishDir "${logsDir}", mode: 'copy', pattern: "${repRID_parseMetadata}.parseMetadata.err"
input:
val repRID_parseMetadata
file fileMeta
file experimentSettingsMeta
file experimentMeta
path fileMeta
path experimentSettingsMeta
path experimentMeta
output:
val ends
val stranded
val spike
val specie
script:
"""
hostname >>${repRID_parseMetadata}.parseMetadata.err
ulimit -a >>${repRID_parseMetadata}.parseMetadata.err
python ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m ${fileMeta} -p repRID
# Check replicate RID metadata
rep=\$(python ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${fileMeta}" -p repRID)
echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID_parseMetadata}.parseMetadata.err
# Get endedness metadata
ends=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p ends)
echo "LOG: endedness metadata parsed: \${ends}" >>${repRID_parseMetadata}.parseMetadata.err
# Get strandedness metadata
stranded=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p stranded)
echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID_parseMetadata}.parseMetadata.err
# Get spike-in metadata
spike=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentSettingsMeta}" -p spike)
echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID_parseMetadata}.parseMetadata.err
# Get species metadata
specie=\$(python3 ${baseDir}/scripts/parseMeta.py -r ${repRID_parseMetadata} -m "${experimentMeta}" -p specie)
echo "LOG: species metadata parsed: \${specie}" >>${repRID_parseMetadata}.parseMetadata.err
"""
}
ends.set {
ends_trimData
}
/*
......@@ -126,10 +152,10 @@ process trimData {
input:
val repRID_trimData
file(fastq) from fastqs
val ends_trimData
output:
path ("*.fq.gz") into fastqs_trimmed
val ends
file ("${repRID_trimData}.trimData.log")
file ("${repRID_trimData}.trimData.err")
......@@ -141,12 +167,10 @@ process trimData {
else
ncore=`nproc`
fi
if [ '${fastq[1]}' == 'null' ]
if [ '${ends_trimData}' == 'se' ]
then
ends='se'
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err;
else
ends='pe'
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID_trimData} -j \${ncore} ${fastq[0]} ${fastq[1]} 1>>${repRID_trimData}.trimData.log 2>>${repRID_trimData}.trimData.err;
fi
"""
......
......@@ -23,11 +23,47 @@ def main():
print("Replicate RID in metadata does not match run parameters: " + metaFile.Replicate_RID.unique() + " vs " + args.repRID)
exit(1)
else:
print(metaFile["Replicate_RID"].unique())
rep=metaFile["Replicate_RID"].unique()[0]
print(rep)
if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2):
print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID))
exit(1)
if (args.parameter == "ends"):
if (metaFile.Paired_End.unique() == "Single End"):
ends = "se"
elif (metaFile.Paired_End.unique() == "Paired End"):
ends = "pe"
else:
print("Ends metadata not match expected options: " + metaFile.Paired_End.unique())
exit(1)
print(ends)
if (args.parameter == "stranded"):
if (metaFile.Has_Strand_Specific_Information.unique() == "yes"):
stranded = "stranded"
elif (metaFile.Has_Strand_Specific_Information.unique() == "no"):
stranded = "unstranded"
else:
print("Stranded metadata not match expected options: " + metaFile.Has_Strand_Specific_Information.unique())
exit(1)
print(stranded)
if (args.parameter == "spike"):
if (metaFile.Used_Spike_Ins.unique() == "yes"):
spike = "yes"
elif (metaFile.Used_Spike_Ins.unique() == "no"):
spike = "no"
else:
print("Spike-ins metadata not match expected options: " + metaFile.Used_Spike_Ins.unique())
exit(1)
print(spike)
if (args.parameter == "specie"):
if (metaFile.Species.unique() == "Mus musculus"):
specie = "Mus musculus"
elif (metaFile.Species.unique() == "Homo sapiens"):
specie = "Homo sapiens"
else:
print("Species metadata not match expected options: " + metaFile.Species.unique())
exit(1)
print(specie)
if __name__ == '__main__':
main()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment