From c35fd5ba27a0a3cd1c89bdf3e93678547b76229d Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Fri, 16 Jul 2021 11:04:00 -0500
Subject: [PATCH] Reimplimented symbolic link of bdbag cookie.

---
 CHANGELOG.md |  1 +
 README.md    |  8 +++++++-
 rna-seq.nf   | 43 +++++++++++++++++++++++++------------------
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04aeb94..49a709c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 
 **Background**
 * Corrected file search parameters due to name inconsistency (#129)
+* Re-implemented sym-link for deriva cookie into ~/.bdbag/ for bdbag fetch to use (#132)
 
 # v2.0.0
 **User Facing**
diff --git a/README.md b/README.md
index 7692cb7..1c505f9 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,13 @@ This pipeline was created to be a standard mRNA-sequencing analysis pipeline whi
 
 Authentication:
 ----------------
-The consortium server used must be authentificated with the [deriva authentication client](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/), and remain authentificated till the end of the pipeline run. Prematurely closing the client will result in invalidation of the tokens, and may result in the pipeline failure. The use of long-lived "globus" tokens is on the roadmap for use in the future.
+The consortium server used must be authentificated with the [deriva authentication client](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/), and remain authentificated till the end of the pipeline run. Prematurely closing the client will result in invalidation of the tokens, and may result in the pipeline failure. The use of long-lived "globus" tokens is on the roadmap for use in the future. If you are using [v2.0.0](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/tags/v2.0.0) you will be required to make a link for bdbag cookie. Below is the code to run:
+
+```sh
+mkdir -p ~/.bdbag
+ln -sf cookies.txt ~/.bdbag/deriva-cookies.txt
+```
+
 
 To Run:
 -------
diff --git a/rna-seq.nf b/rna-seq.nf
index 159805d..3b950ec 100644
--- a/rna-seq.nf
+++ b/rna-seq.nf
@@ -220,7 +220,7 @@ process getBag {
     """
 }
 
-// Set inputBag to downloaded or forced input and replicate them for multiple process inputs 
+// Set inputBag to downloaded or forced input and replicate them for multiple process inputs
 if (inputBagForce != "") {
   inputBag = Channel
     .fromPath(inputBagForce)
@@ -257,6 +257,13 @@ process getData {
     hostname > ${repRID}.getData.log
     ulimit -a >> ${repRID}.getData.log
 
+    # link deriva cookie for authentication
+    echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log
+    mkdir -p ~/.bdbag
+    ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
+    echo -e "LOG: linked" >> ${repRID}.getData.log
+
+
     # get bag basename
     replicate=\$(basename "${inputBag}")
     echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
@@ -723,7 +730,7 @@ process seqwho {
     # get seqwho index
     wget -O SeqWho.ix https://cloud.biohpc.swmed.edu/index.php/s/eeNWqZz8jqN5zWY/download
     echo -e "LOG: seqwho index downloaded" >> ${repRID}.seqwho.log
-    
+
     # run seqwho
     seqwho.py -f *.fastq.gz -x SeqWho.ix
     echo -e "LOG: seqwho ran" >> ${repRID}.seqwho.log
@@ -936,7 +943,7 @@ process seqwho {
     then
       echo -e "Read 2\t\${seqtypeR2}\t\${speciesR2}\t\${seqtypeConfidenceR2}\t\${consensus}\t\${speciesConfidenceR2}" >> seqwhoInfer.tsv
     fi
-    
+
     # save species file
     echo "\${speciesInfer}" > inferSpecies.csv
 
@@ -1581,7 +1588,7 @@ strandedInfer.into {
   strandedInfer_failExecutionRun
 }
 
-/* 
+/*
  * checkMetadata: checks the submitted metadata against inferred
  */
 process checkMetadata {
@@ -2192,7 +2199,7 @@ process aggrQC {
     """
 }
 
-/* 
+/*
  * uploadInputBag: uploads the input bag
  */
 process uploadInputBag {
@@ -2230,7 +2237,7 @@ process uploadInputBag {
     echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
     size=\$(wc -c < ./\${file})
     echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
-    
+
     exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
     if [ "\${exist}" == "[]" ]
     then
@@ -2265,7 +2272,7 @@ inputBagRID.into {
   inputBagRID_failExecutionRun
 }
 
-/* 
+/*
  * uploadExecutionRun: uploads the execution run
  */
 process uploadExecutionRun {
@@ -2283,7 +2290,7 @@ process uploadExecutionRun {
     val seqtypeError from seqtypeError_uploadExecutionRun
     val speciesErrorSeqwho from speciesErrorSeqwho_uploadExecutionRun
     val speciesError from speciesError_uploadExecutionRun
-    
+
   output:
     path ("executionRunRID.csv") into executionRunRID_fl
 
@@ -2370,7 +2377,7 @@ executionRunRID.into {
   executionRunRID_fail
 }
 
-/* 
+/*
  * uploadQC: uploads the mRNA QC
  */
 process uploadQC {
@@ -2480,7 +2487,7 @@ process uploadProcessedFile {
 
   script:
     """
-    
+
     hostname > ${repRID}.uploadProcessedFile.log
     ulimit -a >> ${repRID}.uploadProcessedFile.log
 
@@ -2550,7 +2557,7 @@ process uploadProcessedFile {
     """
 }
 
-/* 
+/*
  * uploadOutputBag: uploads the output bag
  */
 process uploadOutputBag {
@@ -2597,7 +2604,7 @@ process uploadOutputBag {
     echo LOG: ${repRID} output bag md5 sum - \${md5} >> ${repRID}.uploadOutputBag.log
     size=\$(wc -c < ./\${file})
     echo LOG: ${repRID} output bag size - \${size} bytes >> ${repRID}.uploadOutputBag.log
-    
+
     loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/output_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
     echo LOG: output bag uploaded - \${loc} >> ${repRID}.uploadOutputBag.log
     # url-ify the location
@@ -2632,7 +2639,7 @@ outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
   outputBagRID
 )
 
-/* 
+/*
  * finalizeExecutionRun: finalizes the execution run
  */
 process finalizeExecutionRun {
@@ -2698,7 +2705,7 @@ errorDetails.into {
 }
 
 
-/* 
+/*
  * failPreExecutionRun: fail the execution run prematurely for fastq errors
  */
 process failPreExecutionRun {
@@ -2798,7 +2805,7 @@ executionRunRID_preFail_fl.splitCsv(sep: ",", header: false).separate(
   executionRunRID_preFail
 )
 
-/* 
+/*
  * failPreExecutionRun_seqwho: fail the execution run prematurely for seqwho errors
  */
 process failPreExecutionRun_seqwho {
@@ -2902,7 +2909,7 @@ executionRunRID_preFailseqwho_fl.splitCsv(sep: ",", header: false).separate(
 
 failExecutionRunRID = executionRunRID_fail.ifEmpty('').mix(executionRunRID_preFail.ifEmpty('').mix(executionRunRID_preFailseqwho.ifEmpty(''))).filter { it != "" }
 
-/* 
+/*
  * failExecutionRun: fail the execution run
  */
 process failExecutionRun {
@@ -2982,7 +2989,7 @@ process failExecutionRun {
       rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${pipelineError_details}" -o ${source} -c \${cookie} -u ${executionRunRID})
       echo LOG: execution run RID marked as error - \${rid} >> ${repRID}.failExecutionRun.log
     fi
-    
+
     if [ ${params.track} == true ]
     then
       dt=`date +%FT%T.%3N%:z`
@@ -2997,7 +3004,7 @@ process failExecutionRun {
   """
 }
 
-/* 
+/*
  * uploadQC_fail: uploads the mRNA QC on failed execution run
  */
 process uploadQC_fail {
-- 
GitLab