From c35fd5ba27a0a3cd1c89bdf3e93678547b76229d Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Fri, 16 Jul 2021 11:04:00 -0500 Subject: [PATCH] Reimplimented symbolic link of bdbag cookie. --- CHANGELOG.md | 1 + README.md | 8 +++++++- rna-seq.nf | 43 +++++++++++++++++++++++++------------------ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04aeb94..49a709c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ **Background** * Corrected file search parameters due to name inconsistency (#129) +* Re-implemented sym-link for deriva cookie into ~/.bdbag/ for bdbag fetch to use (#132) # v2.0.0 **User Facing** diff --git a/README.md b/README.md index 7692cb7..1c505f9 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,13 @@ This pipeline was created to be a standard mRNA-sequencing analysis pipeline whi Authentication: ---------------- -The consortium server used must be authentificated with the [deriva authentication client](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/), and remain authentificated till the end of the pipeline run. Prematurely closing the client will result in invalidation of the tokens, and may result in the pipeline failure. The use of long-lived "globus" tokens is on the roadmap for use in the future. +The consortium server used must be authentificated with the [deriva authentication client](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/), and remain authentificated till the end of the pipeline run. Prematurely closing the client will result in invalidation of the tokens, and may result in the pipeline failure. The use of long-lived "globus" tokens is on the roadmap for use in the future. If you are using [v2.0.0](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/tags/v2.0.0) you will be required to make a link for bdbag cookie. Below is the code to run: + +```sh +mkdir -p ~/.bdbag +ln -sf cookies.txt ~/.bdbag/deriva-cookies.txt +``` + To Run: ------- diff --git a/rna-seq.nf b/rna-seq.nf index 159805d..3b950ec 100644 --- a/rna-seq.nf +++ b/rna-seq.nf @@ -220,7 +220,7 @@ process getBag { """ } -// Set inputBag to downloaded or forced input and replicate them for multiple process inputs +// Set inputBag to downloaded or forced input and replicate them for multiple process inputs if (inputBagForce != "") { inputBag = Channel .fromPath(inputBagForce) @@ -257,6 +257,13 @@ process getData { hostname > ${repRID}.getData.log ulimit -a >> ${repRID}.getData.log + # link deriva cookie for authentication + echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log + mkdir -p ~/.bdbag + ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt + echo -e "LOG: linked" >> ${repRID}.getData.log + + # get bag basename replicate=\$(basename "${inputBag}") echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log @@ -723,7 +730,7 @@ process seqwho { # get seqwho index wget -O SeqWho.ix https://cloud.biohpc.swmed.edu/index.php/s/eeNWqZz8jqN5zWY/download echo -e "LOG: seqwho index downloaded" >> ${repRID}.seqwho.log - + # run seqwho seqwho.py -f *.fastq.gz -x SeqWho.ix echo -e "LOG: seqwho ran" >> ${repRID}.seqwho.log @@ -936,7 +943,7 @@ process seqwho { then echo -e "Read 2\t\${seqtypeR2}\t\${speciesR2}\t\${seqtypeConfidenceR2}\t\${consensus}\t\${speciesConfidenceR2}" >> seqwhoInfer.tsv fi - + # save species file echo "\${speciesInfer}" > inferSpecies.csv @@ -1581,7 +1588,7 @@ strandedInfer.into { strandedInfer_failExecutionRun } -/* +/* * checkMetadata: checks the submitted metadata against inferred */ process checkMetadata { @@ -2192,7 +2199,7 @@ process aggrQC { """ } -/* +/* * uploadInputBag: uploads the input bag */ process uploadInputBag { @@ -2230,7 +2237,7 @@ process uploadInputBag { echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log size=\$(wc -c < ./\${file}) echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log - + exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5}) if [ "\${exist}" == "[]" ] then @@ -2265,7 +2272,7 @@ inputBagRID.into { inputBagRID_failExecutionRun } -/* +/* * uploadExecutionRun: uploads the execution run */ process uploadExecutionRun { @@ -2283,7 +2290,7 @@ process uploadExecutionRun { val seqtypeError from seqtypeError_uploadExecutionRun val speciesErrorSeqwho from speciesErrorSeqwho_uploadExecutionRun val speciesError from speciesError_uploadExecutionRun - + output: path ("executionRunRID.csv") into executionRunRID_fl @@ -2370,7 +2377,7 @@ executionRunRID.into { executionRunRID_fail } -/* +/* * uploadQC: uploads the mRNA QC */ process uploadQC { @@ -2480,7 +2487,7 @@ process uploadProcessedFile { script: """ - + hostname > ${repRID}.uploadProcessedFile.log ulimit -a >> ${repRID}.uploadProcessedFile.log @@ -2550,7 +2557,7 @@ process uploadProcessedFile { """ } -/* +/* * uploadOutputBag: uploads the output bag */ process uploadOutputBag { @@ -2597,7 +2604,7 @@ process uploadOutputBag { echo LOG: ${repRID} output bag md5 sum - \${md5} >> ${repRID}.uploadOutputBag.log size=\$(wc -c < ./\${file}) echo LOG: ${repRID} output bag size - \${size} bytes >> ${repRID}.uploadOutputBag.log - + loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/output_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents) echo LOG: output bag uploaded - \${loc} >> ${repRID}.uploadOutputBag.log # url-ify the location @@ -2632,7 +2639,7 @@ outputBagRID_fl.splitCsv(sep: ",", header: false).separate( outputBagRID ) -/* +/* * finalizeExecutionRun: finalizes the execution run */ process finalizeExecutionRun { @@ -2698,7 +2705,7 @@ errorDetails.into { } -/* +/* * failPreExecutionRun: fail the execution run prematurely for fastq errors */ process failPreExecutionRun { @@ -2798,7 +2805,7 @@ executionRunRID_preFail_fl.splitCsv(sep: ",", header: false).separate( executionRunRID_preFail ) -/* +/* * failPreExecutionRun_seqwho: fail the execution run prematurely for seqwho errors */ process failPreExecutionRun_seqwho { @@ -2902,7 +2909,7 @@ executionRunRID_preFailseqwho_fl.splitCsv(sep: ",", header: false).separate( failExecutionRunRID = executionRunRID_fail.ifEmpty('').mix(executionRunRID_preFail.ifEmpty('').mix(executionRunRID_preFailseqwho.ifEmpty(''))).filter { it != "" } -/* +/* * failExecutionRun: fail the execution run */ process failExecutionRun { @@ -2982,7 +2989,7 @@ process failExecutionRun { rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${pipelineError_details}" -o ${source} -c \${cookie} -u ${executionRunRID}) echo LOG: execution run RID marked as error - \${rid} >> ${repRID}.failExecutionRun.log fi - + if [ ${params.track} == true ] then dt=`date +%FT%T.%3N%:z` @@ -2997,7 +3004,7 @@ process failExecutionRun { """ } -/* +/* * uploadQC_fail: uploads the mRNA QC on failed execution run */ process uploadQC_fail { -- GitLab