From 43935dd7622adb4b220ed5d5387d6ff2915617d2 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Sat, 18 Jan 2020 15:43:27 -0600 Subject: [PATCH] Make bdbag scripts an input --- workflow/rna-seq.nf | 6 +++++- workflow/scripts/modifyFetch.py | 25 ------------------------- workflow/scripts/splitFetch.py | 25 ------------------------- 3 files changed, 5 insertions(+), 51 deletions(-) delete mode 100644 workflow/scripts/modifyFetch.py delete mode 100644 workflow/scripts/splitFetch.py diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index ab53a41..55be645 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -23,6 +23,9 @@ logsDir = "${outDir}/Logs" // Define fixed files derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json") +// Define script files +script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") + /* * getData: get bagit file from consortium */ @@ -57,6 +60,7 @@ process getData { publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.getData.err" input: + path script_bdbagFetch path cookies, stageAs: 'deriva-cookies.txt' from bdbag path bagit @@ -79,7 +83,7 @@ process getData { echo "LOG: \${replicate}" >>${repRID}.getData.err unzip ${bagit} 2>>${repRID}.getData.err echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err - sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err + sh bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err """ } diff --git a/workflow/scripts/modifyFetch.py b/workflow/scripts/modifyFetch.py deleted file mode 100644 index 82b1d4c..0000000 --- a/workflow/scripts/modifyFetch.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import pandas as pd -import re - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True) - args = parser.parse_args() - return args - -def main(): - args = get_args() - fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None) - fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0) - fileFile_filtered = fileFile[fileFile["File_Type"]=="FastQ"] - fetchFile_filtered = fetchFile[fetchFile[2].str[-9:]==".fastq.gz"] - fetchFile_filtered_renamed = fetchFile_filtered - for i in fileFile_filtered["File_Name"]: - fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)] = fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)].values[0].replace(re.sub("\.R.\.fastq\.gz","",i),fileFile_filtered["Replicate_RID"][fileFile_filtered["File_Name"]==i].values[0]) - fetchFile_filtered_renamed.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/workflow/scripts/splitFetch.py b/workflow/scripts/splitFetch.py deleted file mode 100644 index c8f6004..0000000 --- a/workflow/scripts/splitFetch.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import pandas as pd -import os - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True) - args = parser.parse_args() - return args - -def main(): - args = get_args() - fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None) - fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0) - replicateRID = fileFile.Replicate_RID.unique() - fetchArray = {i:fileFile.URI[(fileFile.Replicate_RID == i) & (fileFile.File_Type == "FastQ")] for i in replicateRID} - for i in replicateRID: - if not os.path.exists(i): - os.mkdir("Replicate_"+i) - fetchFile[fetchFile[0].str.contains('|'.join(fetchArray[i]))].to_csv("Replicate_"+i+"/fetch.txt",sep="\t",header=False,index=False) - -if __name__ == '__main__': - main() \ No newline at end of file -- GitLab