From b89416b523c42fe2fb154c0a2f28bbbe5aa3b265 Mon Sep 17 00:00:00 2001
From: s181706 <jonathan.gesell@utsouthwestern.edu>
Date: Mon, 28 Oct 2019 17:32:53 -0500
Subject: [PATCH] Temporary fix for file naming

---
 workflow/conf/biohpc.config     |  10 +--
 workflow/rna-seq.nf             | 112 ++++++++++++++++++--------------
 workflow/scripts/renameFastq.sh |  23 +++----
 3 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 5203ec8..d221fee 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -39,11 +39,7 @@ singularity {
 }
 
 env {
-  http_proxy = "http://proxy.swmed.edu:3128"
-  https_proxy = "http://proxy.swmed.edu:3128"
-  HTTP_PROXY = "http://proxy.swmed.edu:3128"
-  HTTPS_PROXY = "http://proxy.swmed.edu:3128"
-  all_proxy = "http://proxy.swmed.edu:3128"
-  ALL_PROXY = "http://proxy.swmed.edu:3128"
+  http_proxy = 'http://proxy.swmed.edu:3128'
+  https_proxy = 'http://proxy.swmed.edu:3128'
+  all_proxy = 'http://proxy.swmed.edu:3128'
 }
-
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index d55fb81..32d4077 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -7,9 +7,11 @@ params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
 params.outDir = "${baseDir}/../output"
 
 // Parse input variables
-deriva = Channel
-  .fromPath(params.deriva)
-  .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
+deriva = file(params.deriva)
+deriva.copyTo('~/.bdbag/deriva-cookies.txt')
+//deriva = Channel
+//  .fromPath(params.deriva)
+//  .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
 bdbag = Channel
   .fromPath(params.bdbag)
   .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
@@ -20,62 +22,72 @@ outDir = params.outDir
  * splitData: split bdbag files by replicate so fetch can occure in parallel
  */
 process splitData {
-    tag "${bdbag.baseName}"
-    publishDir "${outDir}/temp/${task.process}", mode: "symlink"
+  tag "${bdbag.baseName}"
+  publishDir "${outDir}/temp/${task.process}", mode: "symlink"
 
-    input:
-        file bdbag
+  input:
+    file bdbag
+    file deriva
 
-    output:
-        file("Replicate_*.zip") into bdbagSplit mode flatten
-        file("${bdbag.baseName}/data/File.csv") into fileMeta
-        file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
-        file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
+  output:
+    file("Replicate_*.zip") into bdbagSplit mode flatten
+    file("${bdbag.baseName}/data/File.csv") into fileMeta
+    file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
+    file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
 
-    script:
-        """
-        hostname
-        ulimit -a
-        study=\$(echo "${bdbag}" | cut -d'.' -f1)
-        echo LOG: \${study}
-        unzip ${bdbag}
-        echo LOG: bdgag unzipped
-        python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
-        echo LOG: fetch file filtered for only .fastq.gz
-        python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
-        echo LOG: fetch file split by replicates
-        sh ${baseDir}/scripts/splitBag.sh \${study}
-        echo LOG: bag recreated with replicate split fetch file
-        """
+  script:
+    """
+    hostname
+    ulimit -a
+    ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
+    study=`echo "${bdbag}" | cut -d'.' -f1`
+    echo LOG: \${study}
+    unzip ${bdbag}
+    echo LOG: bdgag unzipped
+    python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
+    cd \${study}
+    bash ${baseDir}/scripts/fixFetch.sh
+    cd ..
+    echo LOG: fetch file filtered for only .fastq.gz
+    python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
+    echo LOG: fetch file split by replicates
+    sh ${baseDir}/scripts/splitBag.sh \${study}
+    echo LOG: bag recreated with replicate split fetch file
+    """
 }
 
+println {${http_proxy}}
+println {${https_proxy}}
+
 /*
  * getData: fetch study files from consortium with downloaded bdbag.zip
  */
 process getData {
-    tag "${rep.baseName}"
-    publishDir "${outDir}/temp/${task.process}", mode: "symlink"
+  tag "${rep.baseName}"
+  publishDir "${outDir}/temp/${task.process}", mode: "symlink"
 
-    input:
-        file deriva
-        each rep from bdbagSplit
+  input:
+    file deriva
+    each rep from bdbagSplit
 
-    output:
-        file("**/*.R*.fastq.gz") into fastq
+  output:
+    file("**/*.R*.fastq.gz") into fastq
 
-    script:
-        """
-        hostname
-        ulimit -a
-        replicate=\$(echo "${rep}" | cut -d'.' -f1)
-        echo LOG: \${replicate}
-        cp "${deriva}" ~/.bdbag/deriva-cookies.txt
-        echo LOG: deriva cookie loaded
-        unzip ${rep}
-        echo LOG: replicate bdbag unzipped
-        sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
-        echo LOG: replicate bdbag fetched
-        sh ${baseDir}/scripts/renameFastq.sh \${replicate}
-        echo LOG: fastq.gz files renamed to replicate RID
-        """
- }
\ No newline at end of file
+  script:
+    """
+    hostname
+    ulimit -a
+    echo LOG:\${http_proxy}
+    export https_proxy=\${http_proxy}
+    ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
+    replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev)
+    echo LOG: \${replicate}
+    echo LOG: deriva cookie loaded
+    unzip ${rep}
+    echo LOG: replicate bdbag unzipped
+    sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
+    echo LOG: replicate bdbag fetched
+    sh ${baseDir}/scripts/renameFastq.sh \${replicate}
+    echo LOG: fastq.gz files renamed to replicate RID
+    """
+ }
diff --git a/workflow/scripts/renameFastq.sh b/workflow/scripts/renameFastq.sh
index f559376..c57eccb 100644
--- a/workflow/scripts/renameFastq.sh
+++ b/workflow/scripts/renameFastq.sh
@@ -1,15 +1,12 @@
 #!/bin
 
-while read loc checksum fileLocation
-do
-    file=$(echo ${fileLocation##*/})
-    fileName=$(echo ${file%.R*.fastq.gz})
-    fileExt=$(echo ${file##${fileName}.})
-    while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID
-    do
-        if [ ${file} == ${File_Name} ]
-        then
-            find . -type f -name ${file} -execdir mv {} ${Replicate_RID}.${fileExt} ';'
-        fi
-    done < $1/data/File.csv
-done < $1/fetch.txt
\ No newline at end of file
+while read loc checksum fileLocation; do
+  file=$(echo ${fileLocation##*/});
+  fileName=$(echo ${file%.R*.fastq.gz});
+  fileExt=$(echo ${file##${fileName}.});
+  while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID; do
+    if [ "${file}" == "${File_Name}" ]; then
+      find . -type f -name "${file}" -execdir mv {} ${Replicate_RID}.${fileExt} ';';
+    fi;
+    done < $1/data/File.csv;
+done < $1/fetch.txt;
-- 
GitLab