Skip to content
Snippets Groups Projects
Commit 10cc87d9 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '75-no.upload' into '11-deriva.upload'

Resolve "Add an option to not upload"

See merge request !54
parents e253651f 8792bf4b
Branches
Tags
3 merge requests!58Develop,!54Resolve "Add an option to not upload",!53Resolve "process_derivaUpload"
Pipeline #8535 failed with stages
in 1 hour, 9 minutes, and 34 seconds
......@@ -494,7 +494,7 @@ integration_se:
- SE_multiqc_data.json
expire_in: 7 days
retry:
max: 1
max: 0
when:
- always
......@@ -519,7 +519,7 @@ integration_pe:
- PE_multiqc_data.json
expire_in: 7 days
retry:
max: 1
max: 0
when:
- always
......@@ -533,7 +533,7 @@ override_inputBag:
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/staging/Replicate_Q-Y5F6.zip --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/staging/Replicate_Q-Y5F6.zip --upload false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -542,7 +542,7 @@ override_inputBag:
- inputBagOverride_PE_multiqc_data.json
expire_in: 7 days
retry:
max: 1
max: 0
when:
- always
......@@ -555,7 +555,7 @@ override_fastq:
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -564,7 +564,7 @@ override_fastq:
- fastqOverride_PE_multiqc_data.json
expire_in: 7 days
retry:
max: 1
max: 0
when:
- always
......@@ -577,7 +577,7 @@ override_species:
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --upload false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
......@@ -586,7 +586,7 @@ override_species:
- speciesOverride_PE_multiqc_data.json
expire_in: 7 days
retry:
max: 1
max: 0
when:
- always
......
......@@ -7,6 +7,7 @@
* Upload execution run
* Upload mRNA QC
* Create and upload output bag
* Add optional to not upload
**Background**
* Remove (comment out) option to pull references from S3
......
......@@ -34,9 +34,12 @@ To Run:
* **dev** = [dev.gudmap.org](dev.gudmap.org) (default, does not contain all data)
* **staging** = [staging.gudmap.org](staging.gudmap.org) (does not contain all data)
* **production** = [www.gudmap.org](www.gudmap.org) (***does contain all data***)
* `--refMoVersion` mouse reference version ***(optional)***
* `--refHuVersion` human reference version ***(optional)***
* `--refERCCVersion` human reference version ***(optional)***
* `--refMoVersion` mouse reference version ***(optional, default = 38.p6.vM22)***
* `--refHuVersion` human reference version ***(optional, default = 38.p12.v31)***
* `--refERCCVersion` human reference version ***(optional, default = 92)***
* `--upload` option to not upload output back to the data-hub ***(optional, default = true)***
* **true** = upload outputs to the data-hub
* **false** = do *NOT* upload outputs to the data-hub
* `-profile` config profile to use ***(optional)***:
* defaut = processes on BioHPC cluster
* **biohpc** = process on BioHPC cluster
......
{
"bag": {
"bag_name": "Replicate_{rid}",
"bag_name": "{rid}_inputBag",
"bag_algorithms": [
"md5"
],
......
......@@ -18,6 +18,7 @@ params.refMoVersion = "38.p6.vM22"
params.refHuVersion = "38.p12.v31"
params.refERCCVersion = "92"
params.outDir = "${baseDir}/../output"
params.upload = true
params.email = ""
......@@ -51,6 +52,7 @@ refHuVersion = params.refHuVersion
refERCCVersion = params.refERCCVersion
outDir = params.outDir
logsDir = "${outDir}/Logs"
upload = params.upload
inputBagForce = params.inputBagForce
fastqsForce = params.fastqsForce
speciesForce = params.speciesForce
......@@ -155,7 +157,7 @@ process getBag {
path replicateExportConfig
output:
path ("Replicate_*.zip") into bag
path ("*.zip") into bag
when:
inputBagForce == ""
......@@ -220,7 +222,7 @@ process getData {
echo -e "LOG: linked" >> ${repRID}.getData.log
# get bag basename
replicate=\$(basename "${inputBag}" | cut -d "." -f1)
replicate=\$(basename "${inputBag}" | cut -d "_" -f1)
echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
# unzip bag
......@@ -1284,6 +1286,9 @@ process uploadInputBag {
output:
path ("inputBagRID.csv") into inputBagRID_fl
when:
upload
script:
"""
hostname > ${repRID}.uploadInputBag.log
......@@ -1350,6 +1355,9 @@ process uploadExecutionRun {
output:
path ("executionRunRID.csv") into executionRunRID_fl
when:
upload
script:
"""
hostname > ${repRID}.uploadExecutionRun.log
......@@ -1431,6 +1439,9 @@ process uploadQC {
output:
path ("qcRID.csv") into qcRID_fl
when:
upload
script:
"""
hostname > ${repRID}.uploadQC.log
......@@ -1469,7 +1480,6 @@ qcRID_fl.splitCsv(sep: ",", header: false).separate(
qcRID
)
/*
*ouputBag: create ouputBag
*/
......@@ -1493,40 +1503,27 @@ process outputBag {
output:
path ("${repRID}_Output_Bag.zip") into outputBag
when:
upload
script:
"""
hostname > ${repRID}.outputBag.log
ulimit -a >> ${repRID}.outputBag.log
mkdir -p ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/
cp ${bam} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/
cp ${bigwig} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/
cp ${counts} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/
mkdir -p ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
cp ${bam} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
cp ${bai} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
cp ${bigwig} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
cp ${counts} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
cookie=\${cookie:20:-1}
deriva-upload-cli --catalog 2 --token \${cookie} ${source} ./deriva --purge-state
fileBam=\$(basename -a ${bam})
md5Bam=\$(md5sum ./\${fileBam} | awk '{ print \$1 }')
fileBigwig=\$(basename -a ${bigwig})
md5Bigwig=\$(md5sum ./\${fileBigwig} | awk '{ print \$1 }')
fileCounts=\$(basename -a ${counts})
md5Counts=\$(md5sum ./\${fileCounts} | awk '{ print \$1 }')
urlBam=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Bam})
urlBam=\$(echo \${urlBam} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name')
urlBam=\${urlBam:12:-12}
urlBigwig=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Bigwig})
urlBigwig=\$(echo \${urlBigwig} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name')
urlBigwig=\${urlBigwig:12:-12}
urlCounts=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Counts})
urlCounts=\$(echo \${urlCounts} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name')
urlCounts=\${urlCounts:12:-12}
echo \${urlBam} > url.txt
echo \${urlBigwig} >> url.txt
echo \${urlCounts} >> url.txt
deriva-upload-cli --catalog 2 --token \${cookie} ${source} ./deriva
echo LOG: processed files uploaded >> ${repRID}.outputBag.log
deriva-download-cli --catalog 2 --token \${cookie} ${source} ${executionRunExportConfig} . rid=${executionRunRID}
echo LOG: execution run bag downloaded >> ${repRID}.outputBag.log
echo -e "### Run Details" >> runDetails.md
echo -e "**Workflow URL:** https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq" >> runDetails.md
......@@ -1544,6 +1541,7 @@ process outputBag {
echo -e "**Genome Assembly Version:** \${genome} patch \${patch}" >> runDetails.md
echo -e "**Annotation Version:** GENCODE release \${annotation}" >> runDetails.md
echo -e "**Run ID:** ${repRID}" >> runDetails.md
echo LOG: runDetails.md created >> ${repRID}.outputBag.log
unzip Execution_Run_${executionRunRID}.zip
mv Execution_Run_${executionRunRID} ${repRID}_Output_Bag
......@@ -1554,6 +1552,7 @@ process outputBag {
cp ${multiqcJSON} \${loc}
bdbag ./${repRID}_Output_Bag/ --update --archiver zip --debug
echo LOG: output bag created >> ${repRID}.outputBag.log
"""
}
......@@ -1572,6 +1571,9 @@ process uploadOutputBag {
output:
path ("outputBagRID.csv") into outputBagRID_fl
when:
upload
script:
"""
hostname > ${repRID}.uploadOutputBag.log
......
......@@ -2,7 +2,7 @@
if [ -z "${3}" ]
then
bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1}
bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1}_inputBag
for i in $(find */ -name "*R*.fastq.gz")
do
path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz")
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment