Skip to content
Snippets Groups Projects
Commit 59cb654a authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add force params for stranded and spike

parent 0511ac1a
Branches
Tags
2 merge requests!68Develop,!67Resolve "Change Single Read to Single End for submitted endness"
Pipeline #9122 failed with stages
in 1 minute and 12 seconds
......@@ -660,11 +660,11 @@ integration_se:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source dev --upload true -with-dag dag.png --dev false --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./SE_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -684,11 +684,11 @@ integration_pe:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./PE_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -710,11 +710,11 @@ failAmbiguousSpecies:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./failAmbiguousSpecies_report.html
retry:
max: 0
when:
......@@ -725,11 +725,11 @@ failTrunkation:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./failTrunkation_report.html
retry:
max: 0
when:
......@@ -740,11 +740,11 @@ failMismatchR1R2:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./failMismatchR1R2_report.html
retry:
max: 0
when:
......@@ -755,11 +755,11 @@ failUnexpectedMeta:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./failUnexpectedMeta_report.html
retry:
max: 0
when:
......@@ -770,11 +770,11 @@ failFileStructure:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source dev --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source dev --upload true -with-dag dag.png --dev false --ci true -with-report ./failFileStructure_report.html
retry:
max: 0
when:
......@@ -785,11 +785,11 @@ override_inputBag:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source dev --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source dev --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true -with-report ./inputBagOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -807,11 +807,11 @@ override_fastq:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source dev --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source dev --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true -with-report ./fastqOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -829,11 +829,11 @@ override_species:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --speciesForce 'Homo sapiens' --upload false --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --speciesForce 'Homo sapiens' --upload false --dev false --ci true -with-report ./speciesOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -846,6 +846,50 @@ override_species:
when:
- always
override_stranded:
stage: integration
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --strandedForce unstranded --upload false --dev false --ci true -with-report ./strandedOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- strandedOverride_PE_multiqc_data.json
expire_in: 7 days
retry:
max: 0
when:
- always
override_stpike:
stage: integration
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source dev --spikeForce t --upload false --dev false --ci true -with-report ./spikeOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- spikedOverride_PE_multiqc_data.json
expire_in: 7 days
retry:
max: 0
when:
- always
consistency:
stage: consistency
......
......@@ -3,6 +3,7 @@
* Endness metadata "Single Read" changed to "Single End" in data-hub, pipeline updated to handle (#110) ("Single Read" still acceptable for backwards compatibility)
* Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
* Upload empty mRNA_QC entry if data error (#111)
* Allow forcing of strandedness and spike (#100)
**Background**
* Add memory limit (75%) per thread for samtools sort (#108)
......
......@@ -57,8 +57,12 @@ To Run:
* eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
* `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
* eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
* `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses ambiguous species error
* `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses a metadata mismatch or an ambiguous species error
* eg: `--speciesForce 'Mus musculus'`
* `--strandedForce` forces the strandedness to be "forward", "reverse" or "unstranded", it bypasses a metadata mismatch error
* eg: `--strandedForce 'unstranded'`
* `--spikeForce` forces the spike-in to be "f" or "t", it bypasses a metadata mismatch error
* eg: `--spikeForce 't'`
* Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
* `--ci` boolean (default = false)
* `--dev` boolean (default = true)
......
......@@ -27,6 +27,8 @@ params.refSource = "biohpc"
params.inputBagForce = ""
params.fastqsForce = ""
params.speciesForce = ""
params.strandedForce = ""
params.spikeForce = ""
// Define tracking input variables
params.ci = false
......@@ -64,6 +66,8 @@ upload = params.upload
inputBagForce = params.inputBagForce
fastqsForce = params.fastqsForce
speciesForce = params.speciesForce
sptrandedForce = params.speciesForce
spikeForce = params.speciesForce
email = params.email
// Define fixed files and variables
......@@ -311,6 +315,8 @@ process parseMetadata {
path experiment from experimentMeta
path (fastq) from fastqs_parseMetadata.collect()
val fastqCount
val strandedForce
val spikeForce
output:
path "design.csv" into metadata_fl
......@@ -364,10 +370,20 @@ process parseMetadata {
# get strandedness metadata
stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p stranded)
echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
if [ "\${strandedForce}" != "" ]
then
stranded=${strandedForce}
echo -e "LOG: spike-in metadata forced: \${spike}" >> ${repRID}.parseMetadata.log
fi
# get spike-in metadata
spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike)
echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
if [ "\${spikeForce}" != "" ]
then
spike=${spikeForce}
echo -e "LOG: spike-in metadata forced: \${spike}" >> ${repRID}.parseMetadata.log
fi
if [ "\${spike}" == "f" ]
then
spike="false"
......@@ -1170,51 +1186,68 @@ process checkMetadata {
pipelineError=false
# check if submitted metadata matches inferred
if [ "${endsMeta}" != "${endsInfer}" ]
then
pipelineError=true
pipelineError_ends=true
echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError_ends=false
echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
fi
if [ "${strandedMeta}" != "${strandedInfer}" ]
then
pipelineError=true
pipelineError_stranded=true
if [ "${strandedMeta}" == "stranded" ]
if [ "${params.strandedForce}" != "" ]
then
if [[ "${strandedInfer}" == "forward" ]] || [[ "${strandedInfer}" == "reverse" ]]
pipelineError=false
echo -e "LOG: stranded forced: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError=true
pipelineError_stranded=true
if [ "${strandedMeta}" == "stranded" ]
then
pipelineError=false
pipelineError_stranded=false
echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
if [[ "${strandedInfer}" == "forward" ]] || [[ "${strandedInfer}" == "reverse" ]]
then
pipelineError=false
pipelineError_stranded=false
echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
else
echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
fi
else
echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
fi
else
echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
fi
else
pipelineError=false
pipelineError_stranded=false
echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
fi
if [ "${spikeMeta}" != "${spikeInfer}" ]
if [ "${endsMeta}" != "${endsInfer}" ]
then
pipelineError=true
pipelineError_spike=true
echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
pipelineError_ends=true
echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError_ends=false
echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
fi
if [ "${spikeMeta}" != "${spikeInfer}" ]
then
if [[ "${params.spikeForce}" != "" ]]
then
pipelineError_spike=false
echo -e "LOG: spike forced: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError=true
pipelineError_spike=true
echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
fi
else
pipelineError_spike=false
echo -e "LOG: stranded matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
echo -e "LOG: spike matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
fi
if [ "${speciesMeta}" != "${speciesInfer}" ]
then
pipelineError=true
pipelineError_species=true
echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
if [[ "${params.speciesForce}" != "" ]]
then
pipelineError_species=false
echo -e "LOG: species forced: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
else
pipelineError=true
pipelineError_species=true
echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
fi
else
pipelineError_species=false
echo -e "LOG: species matches: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
......@@ -1935,7 +1968,7 @@ process aggrQC {
ulimit -a >> ${repRID}.aggrQC.log
# make run table
if [ "${params.inputBagForce}" == "" ] && [ "${params.fastqsForce}" == "" ] && [ "${params.speciesForce}" == "" ]
if [ "${params.inputBagForce}" == "" ] && [ "${params.fastqsForce}" == "" ] && [ "${params.speciesForce}" == "" && [ "${params.strandedForce}" == "" && [ "${params.spikeForce}" == "" ]
then
input="default"
else
......@@ -1952,6 +1985,14 @@ process aggrQC {
then
input=\$(echo \${input} species)
fi
if [ "${params.strandedForce}" != "" ]
then
input=\$(echo \${input} stranded)
fi
if [ "${params.spikeForce}" != "" ]
then
input=\$(echo \${input} spike)
fi
fi
echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log
echo -e "Session\tSession ID\tStart Time\tPipeline Version\tInput" > run.tsv
......@@ -1969,10 +2010,24 @@ process aggrQC {
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t'${readLengthM}'\t-" >> metadata.tsv
if [ "${params.speciesForce}" == "" ]
then
echo -e "Inferred\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
input=\$(echo "Inferred\t${speciesI}\t)
else
input=\${echo "Inferred\t${speciesI} (FORCED)\t)
fi
input=\$(echo \${input}${endsI}\t)
if [ "${params.strandedForce}" == "" ]
then
input=\$(echo \${input}${strandedI}\t)
else
echo -e "Inferred\t${speciesI} (FORCED)\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
input=$(echo \${input}${strandedI} (FORCED)\t)
fi
if [ "${params.spikeForce}" == "" ]
then
input=$(echo \${input}${spikeI}\t-\t-\t-\t-)
else
input=$(echo \${input}${spikeI} (FORCED)\t-\t-\t-\t-" >> metadata.tsv
fi
echo -e \${input} >> metadata.tsv
echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv
# make reference table
......@@ -2231,11 +2286,11 @@ process uploadOutputBag {
echo LOG: output bag RID uploaded - \${outputBag_rid} >> ${repRID}.uploadOutputBag.log
rid=\${outputBag_rid}
else
exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
exist=\${exist:8:-6}
outputBag_rid=\$(python3 ${script_uploadOutputBag} -e ${executionRunRID} -o ${source} -c \${cookie} -u \${exist})
echo LOG: output bag RID already exists - \${exist} >> ${repRID}.uploadOutputBag.log
rid=\${exist}
exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
exist=\${exist:8:-6}
outputBag_rid=\$(python3 ${script_uploadOutputBag} -e ${executionRunRID} -o ${source} -c \${cookie} -u \${exist})
echo LOG: output bag RID already exists - \${exist} >> ${repRID}.uploadOutputBag.log
rid=\${exist}
fi
echo "\${rid}" > outputBagRID.csv
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment