Skip to content
Snippets Groups Projects
Commit 32aae481 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Change parseMeta to csv ouput

parent 960e0e4f
Branches
Tags
2 merge requests!37v0.0.1,!14Resolve "process_createManifest"
Pipeline #5747 passed with stages
in 25 minutes and 10 seconds
#!/usr/bin/env nextflow
// Define input variables
params.deriva = "${baseDir}/../test_data/credential.json"
params.bdbag = "${baseDir}/../test_data/cookies.txt"
params.deriva = "${baseDir}/../test_data/auth/credential.json"
params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
//params.repRID = "16-1ZX4"
params.repRID = "Q-Y5JA"
......@@ -115,12 +115,7 @@ process parseMetadata {
path experimentMeta
output:
val endsMeta
val endsManual
val ends
val stranded
val spike
val specie
path 'design.csv' into metadata
script:
"""
......@@ -132,21 +127,12 @@ process parseMetadata {
echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID}.parseMetadata.err
# Get endedness metadata
endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p ends)
endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p endsMeta)
echo "LOG: endedness metadata parsed: \${endsMeta}" >>${repRID}.parseMetadata.err
# Manually get endness
endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p endsManual)
echo "LOG: endedness manually detected: \${endsManual}" >>${repRID}.parseMetadata.err
if [ '\${endsMeta}' == 'uk' ]
then
ends=\${endsManual}
echo "LOG: manual detected endness used: \${ends}" >>${repRID}.parseMetadata.err
else
ends=\${endsMeta}
echo "LOG: metadata endness used: \${ends}" >>${repRID}.parseMetadata.err
fi
# Get strandedness metadata
stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded)
......@@ -157,12 +143,18 @@ process parseMetadata {
echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID}.parseMetadata.err
# Get species metadata
specie=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p specie)
echo "LOG: species metadata parsed: \${specie}" >>${repRID}.parseMetadata.err
species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species)
echo "LOG: species metadata parsed: \${species}" >>${repRID}.parseMetadata.err
# Save design file
echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv
"""
}
ends_trimData = ends
metadata.splitCsv(sep: ',', header: false).into {
metadata_trimData
metadata_qc
}
/*
* trimData: trims any adapter or non-host sequences from the data
......@@ -173,7 +165,7 @@ process trimData {
input:
file(fastq) from fastqs
val ends_trimData
tuple val(rep), val(endsMeta), val(endsManual), val(stranded), val(spike), val(species) from metadata_trimData
output:
path ("*.fq.gz") into fastqs_trimmed
......@@ -186,7 +178,7 @@ process trimData {
ulimit -a >>${repRID}.trimData.err
# trim fastqs
if [ '${ends_trimData}' == 'se' ]
if [ '${endsManual}' == 'se' ]
then
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err;
else
......
......@@ -17,6 +17,8 @@ def get_args():
def main():
args = get_args()
metaFile = pd.read_csv(args.metaFile,sep=",",header=0)
# Check replicate RID metadata from 'File.csv'
if (args.parameter == "repRID"):
if (len(metaFile.Replicate_RID.unique()) > 1):
print("There are multiple replicate RID's in the metadata: " + " ".join(metaFile.Replicate_RID.unique()))
......@@ -30,20 +32,26 @@ def main():
if (len(metaFile[metaFile["File_Type"] == "FastQ"]) > 2):
print("There are more then 2 fastq's in the metadata: " + " ".join(metaFile[metaFile["File_Type"] == "FastQ"].RID))
exit(1)
if (args.parameter == "ends"):
# Get endedness metadata from 'Experiment Settings.csv'
if (args.parameter == "endsMeta"):
if (metaFile.Paired_End.unique() == "Single End"):
ends = "se"
endsMeta = "se"
elif (metaFile.Paired_End.unique() == "Paired End"):
ends = "pe"
endsMeta = "pe"
else:
ends = "uk"
print(ends)
endsMeta = "uk"
print(endsMeta)
# Manually get endness count from 'File.csv'
if (args.parameter == "endsManual"):
if (len(metaFile[metaFile["File_Type"] == "FastQ"]) == 1):
endsManual = "se"
elif (len(metaFile[metaFile["File_Type"] == "FastQ"]) == 2):
endsManual = "pe"
print(endsManual)
# Get strandedness metadata from 'Experiment Settings.csv'
if (args.parameter == "stranded"):
if (metaFile.Has_Strand_Specific_Information.unique() == "yes"):
stranded = "stranded"
......@@ -53,6 +61,8 @@ def main():
print("Stranded metadata not match expected options: " + metaFile.Has_Strand_Specific_Information.unique())
exit(1)
print(stranded)
# Get spike-in metadata from 'Experiment Settings.csv'
if (args.parameter == "spike"):
if (metaFile.Used_Spike_Ins.unique() == "yes"):
spike = "yes"
......@@ -62,15 +72,17 @@ def main():
print("Spike-ins metadata not match expected options: " + metaFile.Used_Spike_Ins.unique())
exit(1)
print(spike)
if (args.parameter == "specie"):
# Get species metadata from 'Experiment.csv'
if (args.parameter == "species"):
if (metaFile.Species.unique() == "Mus musculus"):
specie = "Mus musculus"
species = "Mus musculus"
elif (metaFile.Species.unique() == "Homo sapiens"):
specie = "Homo sapiens"
species = "Homo sapiens"
else:
print("Species metadata not match expected options: " + metaFile.Species.unique())
exit(1)
print(specie)
print(species)
if __name__ == '__main__':
main()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment