diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 90ed079cb250641b0d7748af0c45bb1ca9836b6b..29b3c613783535875c9c5bc5e5cc75c6a0655d06 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -494,7 +494,7 @@ integration_se: - SE_multiqc_data.json expire_in: 7 days retry: - max: 1 + max: 0 when: - always @@ -519,7 +519,7 @@ integration_pe: - PE_multiqc_data.json expire_in: 7 days retry: - max: 1 + max: 0 when: - always @@ -533,7 +533,7 @@ override_inputBag: script: - hostname - ulimit -a - - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/staging/Replicate_Q-Y5F6.zip --ci true + - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/staging/Replicate_Q-Y5F6.zip --upload false --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" @@ -542,7 +542,7 @@ override_inputBag: - inputBagOverride_PE_multiqc_data.json expire_in: 7 days retry: - max: 1 + max: 0 when: - always @@ -555,7 +555,7 @@ override_fastq: script: - hostname - ulimit -a - - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true + - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" @@ -564,7 +564,7 @@ override_fastq: - fastqOverride_PE_multiqc_data.json expire_in: 7 days retry: - max: 1 + max: 0 when: - always @@ -577,7 +577,7 @@ override_species: script: - hostname - ulimit -a - - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true + - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --upload false --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" @@ -586,7 +586,7 @@ override_species: - speciesOverride_PE_multiqc_data.json expire_in: 7 days retry: - max: 1 + max: 0 when: - always diff --git a/CHANGELOG.md b/CHANGELOG.md index fe06f42c257925ff8110291afd8112117f7a6d56..97e74118d13b366d50c864570c4f319ff92701a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Upload execution run * Upload mRNA QC * Create and upload output bag +* Add optional to not upload **Background** * Remove (comment out) option to pull references from S3 diff --git a/README.md b/README.md index 4baf67a6ce4626a309c5346fea705b51c722a3d4..c7002c709b2ab5b2781f8e1bc5f33cc8c1178b55 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,12 @@ To Run: * **dev** = [dev.gudmap.org](dev.gudmap.org) (default, does not contain all data) * **staging** = [staging.gudmap.org](staging.gudmap.org) (does not contain all data) * **production** = [www.gudmap.org](www.gudmap.org) (***does contain all data***) - * `--refMoVersion` mouse reference version ***(optional)*** - * `--refHuVersion` human reference version ***(optional)*** - * `--refERCCVersion` human reference version ***(optional)*** + * `--refMoVersion` mouse reference version ***(optional, default = 38.p6.vM22)*** + * `--refHuVersion` human reference version ***(optional, default = 38.p12.v31)*** + * `--refERCCVersion` human reference version ***(optional, default = 92)*** + * `--upload` option to not upload output back to the data-hub ***(optional, default = true)*** + * **true** = upload outputs to the data-hub + * **false** = do *NOT* upload outputs to the data-hub * `-profile` config profile to use ***(optional)***: * defaut = processes on BioHPC cluster * **biohpc** = process on BioHPC cluster diff --git a/workflow/conf/replicate_export_config.json b/workflow/conf/replicate_export_config.json index ff17fa513c5bc130a2e2bdaf9aa41b070c99b290..4380e46734a4425f7df57ad0cf0553a868b03c9d 100644 --- a/workflow/conf/replicate_export_config.json +++ b/workflow/conf/replicate_export_config.json @@ -1,6 +1,6 @@ { "bag": { - "bag_name": "Replicate_{rid}", + "bag_name": "{rid}_inputBag", "bag_algorithms": [ "md5" ], diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 1056f77e512ec6c21973e155363e4ad4b4101be6..ccedc85a24ee1856d4284d6b2f92595cde55b125 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -18,6 +18,7 @@ params.refMoVersion = "38.p6.vM22" params.refHuVersion = "38.p12.v31" params.refERCCVersion = "92" params.outDir = "${baseDir}/../output" +params.upload = true params.email = "" @@ -51,6 +52,7 @@ refHuVersion = params.refHuVersion refERCCVersion = params.refERCCVersion outDir = params.outDir logsDir = "${outDir}/Logs" +upload = params.upload inputBagForce = params.inputBagForce fastqsForce = params.fastqsForce speciesForce = params.speciesForce @@ -155,7 +157,7 @@ process getBag { path replicateExportConfig output: - path ("Replicate_*.zip") into bag + path ("*.zip") into bag when: inputBagForce == "" @@ -220,7 +222,7 @@ process getData { echo -e "LOG: linked" >> ${repRID}.getData.log # get bag basename - replicate=\$(basename "${inputBag}" | cut -d "." -f1) + replicate=\$(basename "${inputBag}" | cut -d "_" -f1) echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log # unzip bag @@ -1284,6 +1286,9 @@ process uploadInputBag { output: path ("inputBagRID.csv") into inputBagRID_fl + when: + upload + script: """ hostname > ${repRID}.uploadInputBag.log @@ -1350,6 +1355,9 @@ process uploadExecutionRun { output: path ("executionRunRID.csv") into executionRunRID_fl + when: + upload + script: """ hostname > ${repRID}.uploadExecutionRun.log @@ -1431,6 +1439,9 @@ process uploadQC { output: path ("qcRID.csv") into qcRID_fl + when: + upload + script: """ hostname > ${repRID}.uploadQC.log @@ -1469,7 +1480,6 @@ qcRID_fl.splitCsv(sep: ",", header: false).separate( qcRID ) - /* *ouputBag: create ouputBag */ @@ -1493,40 +1503,27 @@ process outputBag { output: path ("${repRID}_Output_Bag.zip") into outputBag + when: + upload + script: """ hostname > ${repRID}.outputBag.log ulimit -a >> ${repRID}.outputBag.log - mkdir -p ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/ - cp ${bam} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/ - cp ${bigwig} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/ - cp ${counts} ./deriva/Seq/Workflow_Runs/${studyRID}/${executionRunRID}/ + mkdir -p ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/ + cp ${bam} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/ + cp ${bai} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/ + cp ${bigwig} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/ + cp ${counts} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/ cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"') cookie=\${cookie:20:-1} - deriva-upload-cli --catalog 2 --token \${cookie} ${source} ./deriva --purge-state - - fileBam=\$(basename -a ${bam}) - md5Bam=\$(md5sum ./\${fileBam} | awk '{ print \$1 }') - fileBigwig=\$(basename -a ${bigwig}) - md5Bigwig=\$(md5sum ./\${fileBigwig} | awk '{ print \$1 }') - fileCounts=\$(basename -a ${counts}) - md5Counts=\$(md5sum ./\${fileCounts} | awk '{ print \$1 }') - urlBam=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Bam}) - urlBam=\$(echo \${urlBam} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name') - urlBam=\${urlBam:12:-12} - urlBigwig=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Bigwig}) - urlBigwig=\$(echo \${urlBigwig} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name') - urlBigwig=\${urlBigwig:12:-12} - urlCounts=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/File_MD5=\${md5Counts}) - urlCounts=\$(echo \${urlCounts} | grep -o '\\"File_URL\\":\\".*\\",\\"File_Name') - urlCounts=\${urlCounts:12:-12} - echo \${urlBam} > url.txt - echo \${urlBigwig} >> url.txt - echo \${urlCounts} >> url.txt + deriva-upload-cli --catalog 2 --token \${cookie} ${source} ./deriva + echo LOG: processed files uploaded >> ${repRID}.outputBag.log deriva-download-cli --catalog 2 --token \${cookie} ${source} ${executionRunExportConfig} . rid=${executionRunRID} + echo LOG: execution run bag downloaded >> ${repRID}.outputBag.log echo -e "### Run Details" >> runDetails.md echo -e "**Workflow URL:** https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq" >> runDetails.md @@ -1544,6 +1541,7 @@ process outputBag { echo -e "**Genome Assembly Version:** \${genome} patch \${patch}" >> runDetails.md echo -e "**Annotation Version:** GENCODE release \${annotation}" >> runDetails.md echo -e "**Run ID:** ${repRID}" >> runDetails.md + echo LOG: runDetails.md created >> ${repRID}.outputBag.log unzip Execution_Run_${executionRunRID}.zip mv Execution_Run_${executionRunRID} ${repRID}_Output_Bag @@ -1554,6 +1552,7 @@ process outputBag { cp ${multiqcJSON} \${loc} bdbag ./${repRID}_Output_Bag/ --update --archiver zip --debug + echo LOG: output bag created >> ${repRID}.outputBag.log """ } @@ -1572,6 +1571,9 @@ process uploadOutputBag { output: path ("outputBagRID.csv") into outputBagRID_fl + when: + upload + script: """ hostname > ${repRID}.uploadOutputBag.log diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh index 606b88397d5a6cf4feb4aa38d7615e3e3ba48735..1cab13098ce042e03a8a7506b551323f2c24c2e5 100644 --- a/workflow/scripts/bdbagFetch.sh +++ b/workflow/scripts/bdbagFetch.sh @@ -2,7 +2,7 @@ if [ -z "${3}" ] then - bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1} + bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1}_inputBag for i in $(find */ -name "*R*.fastq.gz") do path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz")