From ed35a88e4d8f476dd779798d4fa3cf9d91be83b0 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Fri, 11 Oct 2019 21:26:41 -0500
Subject: [PATCH] Working version of getData process

---
 workflow/rna-seq.nf             | 17 ++++++++++++++---
 workflow/scripts/bdbagFetch.sh  |  3 +++
 workflow/scripts/modifyFetch.sh |  3 ---
 workflow/scripts/renameFastq.sh | 15 +++++++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)
 create mode 100644 workflow/scripts/bdbagFetch.sh
 delete mode 100644 workflow/scripts/modifyFetch.sh
 create mode 100644 workflow/scripts/renameFastq.sh

diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index b272392..d839044 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -25,14 +25,25 @@ outDir = params.outDir
         file bdbag
 
     output:
-        file("*") into dataPaths
+        file("**/*.R*.fastq.gz") into fastqPaths
+        file("**/File.csv") into filePaths
+        file("**/Experiment Settings.csv") into experimentSettingsPaths
+        file("**/Experiment.csv") into experimentPaths
 
     script:
         """
         hostname
         ulimit -a
+        study=\$(echo "${bdbag}" | cut -d'.' -f1)
+        echo LOG: \${study}
         unzip ${bdbag}
-        python3 ${baseDir}/scripts/modifyFetch.py -f \$(echo "${bdbag}" | cut -d'.' -f1)
-        bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)"
+        echo LOG: bdgag unzipped
+        python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
+        echo LOG: fetch file filtered for only .fastq.gz
+        #bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)"
+        sh ${baseDir}/scripts/bdbagFetch.sh \${study}
+        echo LOG: bdbag fetched
+        sh ${baseDir}/scripts/renameFastq.sh \${study}
+        echo LOG: fastq.gz files renamed to replicate RID
         """
  }
\ No newline at end of file
diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh
new file mode 100644
index 0000000..28dab3f
--- /dev/null
+++ b/workflow/scripts/bdbagFetch.sh
@@ -0,0 +1,3 @@
+#!/bin
+
+bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1
\ No newline at end of file
diff --git a/workflow/scripts/modifyFetch.sh b/workflow/scripts/modifyFetch.sh
deleted file mode 100644
index f243f5c..0000000
--- a/workflow/scripts/modifyFetch.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin
-
-unzip $1
\ No newline at end of file
diff --git a/workflow/scripts/renameFastq.sh b/workflow/scripts/renameFastq.sh
new file mode 100644
index 0000000..f559376
--- /dev/null
+++ b/workflow/scripts/renameFastq.sh
@@ -0,0 +1,15 @@
+#!/bin
+
+while read loc checksum fileLocation
+do
+    file=$(echo ${fileLocation##*/})
+    fileName=$(echo ${file%.R*.fastq.gz})
+    fileExt=$(echo ${file##${fileName}.})
+    while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID
+    do
+        if [ ${file} == ${File_Name} ]
+        then
+            find . -type f -name ${file} -execdir mv {} ${Replicate_RID}.${fileExt} ';'
+        fi
+    done < $1/data/File.csv
+done < $1/fetch.txt
\ No newline at end of file
-- 
GitLab