From c2b05eaa2ea81020c64868862fff6a738b691d1c Mon Sep 17 00:00:00 2001
From: s181706 <jonathan.gesell@utsouthwestern.edu>
Date: Wed, 30 Oct 2019 17:10:29 -0500
Subject: [PATCH] Updated for concise output files

---
 workflow/rna-seq.nf            | 38 +++++++++++++++-------------------
 workflow/scripts/bdbagFetch.sh |  7 +++++--
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index cb0ca67..4f1fd5f 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -1,13 +1,13 @@
 #!/usr/bin/env nextflow
 
 // Define input variables
-params.deriva = "${baseDir}/../test_data/deriva-cookies.txt"
+params.deriva = "/project/BICF/BICF_Core/shared/gudmap/cookies/deriva-cookies.txt"
 params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
 
 params.outDir = "${baseDir}/../output"
 
 // Parse input variables
-deriva = file(params.deriva)
+deriva = file(params.deriva, checkIfExists: 'true')
 bdbag = Channel
   .fromPath(params.bdbag)
   .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
@@ -19,10 +19,10 @@ outDir = params.outDir
  */
 process splitData {
   tag "${bdbag.baseName}"
-  publishDir "${outDir}/temp/${task.process}", mode: "symlink"
 
   input:
     file bdbag
+    path cookies, stageAs: 'cookies.txt' from deriva
 
   output:
     file("Replicate_*.zip") into bdbagSplit mode flatten
@@ -34,48 +34,44 @@ process splitData {
     """
     hostname
     ulimit -a
-    ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
-    echo LOG: deriva cookie linked
-    study=`echo "${bdbag}" | cut -d'.' -f1`
-    echo LOG: \${study}
+    ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt
+    echo "LOG: deriva cookie linked"
+    study=`echo "${bdbag}" | cut -d '.' -f1`
+    echo "LOG: \${study}"
     unzip ${bdbag}
-    echo LOG: bdgag unzipped
+    echo "LOG: bdgag unzipped"
     python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
-    echo LOG: fetch file filtered for only .fastq.gz
+    echo "LOG: fetch file filtered for only .fastq.gz"
     python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
-    echo LOG: fetch file split by replicates
+    echo "LOG: fetch file split by replicates"
     sh ${baseDir}/scripts/splitBag.sh \${study}
-    echo LOG: bag recreated with replicate split fetch file
+    echo "LOG: bag recreated with replicate split fetch file"
     """
 }
 
-println {${http_proxy}}
-println {${https_proxy}}
-
 /*
  * getData: fetch study files from consortium with downloaded bdbag.zip
  */
 process getData {
   tag "${rep.baseName}"
-  publishDir "${outDir}/temp/${task.process}", mode: "symlink"
+  publishDir "${outDir}/tempOut/fastqs", mode: "symlink"
 
   input:
     each rep from bdbagSplit
 
   output:
-    file("**/*.R*.fastq.gz") into fastq
+    path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq
 
   script:
     """
     hostname
     ulimit -a
-    echo LOG:\${http_proxy}
     export https_proxy=\${http_proxy}
-    replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev)
-    echo LOG: \${replicate}
+    replicate=\$(basename "${rep}" | cut -d '.' -f1)
+    echo "LOG: \${replicate}"
     unzip ${rep}
-    echo LOG: replicate bdbag unzipped
+    echo "LOG: replicate bdbag unzipped"
     sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
-    echo LOG: replicate bdbag fetched
+    echo "LOG: replicate bdbag fetched"
     """
  }
diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh
index 28dab3f..9af4eb4 100644
--- a/workflow/scripts/bdbagFetch.sh
+++ b/workflow/scripts/bdbagFetch.sh
@@ -1,3 +1,6 @@
-#!/bin
+#!/bin/bash
 
-bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1
\ No newline at end of file
+bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 &&
+for i in $(find */ -name "*.R*.fastq.gz"); do
+  mv ${i} .;
+done;
-- 
GitLab