Skip to content
Snippets Groups Projects
Commit d3e19ff3 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add ends override

parent 1e8d161e
Branches
Tags
2 merge requests!76Develop,!71Seqwho
......@@ -7,6 +7,7 @@
* Add seqwho
* Add seqwho results to multiqc report
* Modify repository structure to allow for use with XPACK-DNANEXUS
* Add override for endness
**Background**
* Add memory limit (75%) per thread for samtools sort (#108)
......
......@@ -56,12 +56,14 @@ To Run:
* `--inputBagForce` utilizes a local replicate inputBag instead of downloading from the data-hub (still requires accurate repRID input)
* eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
* `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
* eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
* eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order, also consider using `endsForce` if the endness doesn't match submitted value)
* `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses a metadata mismatch or an ambiguous species error
* eg: `--speciesForce 'Mus musculus'`
* `--endsForce` forces the endness to be "se", or "pe", it bypasses a metadata mismatch error
* eg: `--endsForce 'pe'`
* `--strandedForce` forces the strandedness to be "forward", "reverse" or "unstranded", it bypasses a metadata mismatch error
* eg: `--strandedForce 'unstranded'`
* `--spikeForce` forces the spike-in to be "false" or "true", it bypasses a metadata mismatch error
* `--spikeForce` forces the spike-in to be "false", or "true", it bypasses a metadata mismatch error
* eg: `--spikeForce 'true'`
* Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
* `--ci` boolean (default = false)
......
......@@ -26,6 +26,7 @@ params.track = false
params.refSource = "biohpc"
params.inputBagForce = ""
params.fastqsForce = ""
params.endsForce = ""
params.speciesForce = ""
params.strandedForce = ""
params.spikeForce = ""
......@@ -64,6 +65,7 @@ logsDir = "${outDir}/Logs"
upload = params.upload
inputBagForce = params.inputBagForce
fastqsForce = params.fastqsForce
endsForce = params.endsForce
speciesForce = params.speciesForce
strandedForce = params.strandedForce
spikeForce = params.spikeForce
......@@ -1469,6 +1471,7 @@ process inferMetadata {
path sampledBam
path reference_inferMetadata
path script_inferMeta
val endsForce
val strandedForce
val fastqCountError from fastqCountError_inferMetadata
val fastqReadError from fastqReadError_inferMetadata
......@@ -1489,41 +1492,47 @@ process inferMetadata {
hostname > ${repRID}.inferMetadata.log
ulimit -a >> ${repRID}.inferMetadata.log
# infer experimental setting from dedup bam
echo -e "LOG: infer experimental setting from bam" >> ${repRID}.inferMetadata.log
infer_experiment.py -r ./genome.bed -i ${sampledBam} 1>> ${repRID}.infer_experiment.txt
echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
# infer experimental setting from dedup bam
echo -e "LOG: infer experimental setting from bam" >> ${repRID}.inferMetadata.log
infer_experiment.py -r ./genome.bed -i ${sampledBam} 1>> ${repRID}.infer_experiment.txt
echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
if [ \${ended} == "PairEnd" ]
then
ends="pe"
percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
elif [ \${ended} == "SingleEnd" ]
then
ends="se"
percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
fi
echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
then
stranded="forward"
elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
then
stranded="reverse"
else
stranded="unstranded"
fi
echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
if [ "${strandedForce}" != "" ]
then
stranded=${strandedForce}
echo -e "LOG: spike-in metadata forced: \${stranded}" >> ${repRID}.inferMetadata.log
fi
ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
if [ \${ended} == "PairEnd" ]
then
ends="pe"
percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
elif [ \${ended} == "SingleEnd" ]
then
ends="se"
percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
fi
echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
then
stranded="forward"
elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
then
stranded="reverse"
else
stranded="unstranded"
fi
echo -e "LOG: ends set to: \${ends}" >> ${repRID}.inferMetadata.log
if [ "${endsForce}" != "" ]
then
ends=${endsForce}
echo -e "LOG: ends metadata forced: \${ends}" >> ${repRID}.inferMetadata.log
fi
echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
if [ "${strandedForce}" != "" ]
then
stranded=${strandedForce}
echo -e "LOG: spike-in metadata forced: \${stranded}" >> ${repRID}.inferMetadata.log
fi
# write inferred metadata to file
echo "\${ends},\${stranded},\${percentF},\${percentR},\${fail}" > infer.csv
......@@ -1632,9 +1641,16 @@ process checkMetadata {
fi
if [ "${endsMeta}" != "${endsInfer}" ]
then
pipelineError=true
pipelineError_ends=true
echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
if [ "${params.endsForce}" != "" ]
then
pipelineError=false
pipelineError_ends=false
echo -e "LOG: ends forced: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError=true
pipelineError_ends=true
echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
fi
else
pipelineError_ends=false
echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment