From 43935dd7622adb4b220ed5d5387d6ff2915617d2 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Sat, 18 Jan 2020 15:43:27 -0600
Subject: [PATCH] Make bdbag scripts an input

---
 workflow/rna-seq.nf             |  6 +++++-
 workflow/scripts/modifyFetch.py | 25 -------------------------
 workflow/scripts/splitFetch.py  | 25 -------------------------
 3 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 workflow/scripts/modifyFetch.py
 delete mode 100644 workflow/scripts/splitFetch.py

diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index ab53a41..55be645 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -23,6 +23,9 @@ logsDir = "${outDir}/Logs"
 // Define fixed files
 derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
 
+// Define script files
+script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
+
 /*
  * getData: get bagit file from consortium
  */
@@ -57,6 +60,7 @@ process getData {
   publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.getData.err"
 
   input:
+    path script_bdbagFetch
     path cookies, stageAs: 'deriva-cookies.txt' from bdbag
     path bagit
 
@@ -79,7 +83,7 @@ process getData {
     echo "LOG: \${replicate}" >>${repRID}.getData.err
     unzip ${bagit} 2>>${repRID}.getData.err
     echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err
-    sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
+    sh bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
     echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err
     """
 }
diff --git a/workflow/scripts/modifyFetch.py b/workflow/scripts/modifyFetch.py
deleted file mode 100644
index 82b1d4c..0000000
--- a/workflow/scripts/modifyFetch.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import pandas as pd
-import re
-
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
-    args = parser.parse_args()
-    return args
-
-def main():
-    args = get_args()
-    fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
-    fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
-    fileFile_filtered = fileFile[fileFile["File_Type"]=="FastQ"]
-    fetchFile_filtered = fetchFile[fetchFile[2].str[-9:]==".fastq.gz"]
-    fetchFile_filtered_renamed = fetchFile_filtered
-    for i in fileFile_filtered["File_Name"]:
-        fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)] = fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)].values[0].replace(re.sub("\.R.\.fastq\.gz","",i),fileFile_filtered["Replicate_RID"][fileFile_filtered["File_Name"]==i].values[0])
-    fetchFile_filtered_renamed.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/workflow/scripts/splitFetch.py b/workflow/scripts/splitFetch.py
deleted file mode 100644
index c8f6004..0000000
--- a/workflow/scripts/splitFetch.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import pandas as pd
-import os
-
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
-    args = parser.parse_args()
-    return args
-
-def main():
-    args = get_args()
-    fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
-    fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
-    replicateRID = fileFile.Replicate_RID.unique()
-    fetchArray = {i:fileFile.URI[(fileFile.Replicate_RID == i) & (fileFile.File_Type == "FastQ")] for i in replicateRID}
-    for i in replicateRID:
-        if not os.path.exists(i):
-            os.mkdir("Replicate_"+i)
-        fetchFile[fetchFile[0].str.contains('|'.join(fetchArray[i]))].to_csv("Replicate_"+i+"/fetch.txt",sep="\t",header=False,index=False)
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
-- 
GitLab