diff --git a/.gitignore b/.gitignore
index 2bc34493af5ad8d30a9ef477283aa7c4b32700b8..8b4b1eadf6253fc94cefe75b485a051ed8f3d71e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -297,7 +297,6 @@ $RECYCLE.BIN/
 
 # nextflow analysis folders/files
 /test_data/*
-/workflow/docker/images/*
 /workflow/.nextflow/*
 /workflow/work/*
 /workflow/output/*
diff --git a/docs/GUDMAP.RBK Pipeline.docx b/docs/GUDMAP.RBK Pipeline.docx
index deae8a8fbfb7adc32ba2fba03a25eca6af57b4d7..7230d3575b627acad8496bb24aabfc9366507a77 100755
Binary files a/docs/GUDMAP.RBK Pipeline.docx and b/docs/GUDMAP.RBK Pipeline.docx differ
diff --git a/nextflow.config b/nextflow.config
deleted file mode 100644
index 28777047bfa85b13d08a0df02a22e6eac6d66540..0000000000000000000000000000000000000000
--- a/nextflow.config
+++ /dev/null
@@ -1,5 +0,0 @@
-profiles {
-  standard {
-    includeConfig 'workflow/conf/biohpc.config'
-  }
-}
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 10bb43a6cc67fdf1942b582e908ace5ed4877c36..5203ec8faf03460601a3cfa4340c840ada365431 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -3,6 +3,9 @@ process {
   queue = 'super'
 
   // Process specific configuration
+  withName:splitData {
+    container = 'docker://bicf/bdbag:1.0'
+  }
   withName:getData {
     container = 'docker://bicf/bdbag:1.0'
   }
diff --git a/workflow/docker/.gitkeep b/workflow/docker/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/workflow/docker/getData b/workflow/docker/getData
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/workflow/docker/images/.gitkeep b/workflow/docker/images/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/workflow/docker/temp b/workflow/docker/temp
deleted file mode 100644
index f7dcb3af08981d465bf0838d09de1b38e9e0c5aa..0000000000000000000000000000000000000000
--- a/workflow/docker/temp
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-RUN apt-get install -y python3.7 python3-pip
-
-RUN wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-  bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \
-  rm Miniconda3-latest-Linux-x86_64.sh
-ENV PATH=/miniconda/bin:${PATH}                                                                                      
-RUN conda config --add channels defaults && \
-  conda config --add channels bioconda && \
-  conda config --add channels conda-forge && \
-  conda update -n base -c defaults -y conda
-
-RUN pip install --upgrade pip
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 86e537b648edd8691f1e0c5bff59066b751a2187..d55fb81ac18df0bc4b6ca3d2819cb83f929a075b 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -1,34 +1,36 @@
 #!/usr/bin/env nextflow
 
 // Define input variables
+params.deriva = "${baseDir}/../test_data/deriva-cookies.txt"
 params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
 
 params.outDir = "${baseDir}/../output"
 
 // Parse input variables
+deriva = Channel
+  .fromPath(params.deriva)
+  .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
 bdbag = Channel
   .fromPath(params.bdbag)
   .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
 
 outDir = params.outDir
 
-
 /*
- * getData: fetch study files from consortium with downloaded bdbag.zip
- * python must be loaded prior to nextflow run, because conda env create from .yml doesn't work with nextflow loaded module (either process in-line, or config file)
+ * splitData: split bdbag files by replicate so fetch can occure in parallel
  */
- process getData {
-     publishDir "${outDir}/temp/getData", mode: "symlink"
-//     conda "${baseDir}/conf/conda.env.bdbag.yml"
+process splitData {
+    tag "${bdbag.baseName}"
+    publishDir "${outDir}/temp/${task.process}", mode: "symlink"
 
-     input:
+    input:
         file bdbag
 
     output:
-        file("**/*.R*.fastq.gz") into fastqPaths
-        file("**/File.csv") into filePaths
-        file("**/Experiment Settings.csv") into experimentSettingsPaths
-        file("**/Experiment.csv") into experimentPaths
+        file("Replicate_*.zip") into bdbagSplit mode flatten
+        file("${bdbag.baseName}/data/File.csv") into fileMeta
+        file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
+        file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
 
     script:
         """
@@ -40,10 +42,40 @@ outDir = params.outDir
         echo LOG: bdgag unzipped
         python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
         echo LOG: fetch file filtered for only .fastq.gz
-        #bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)"
-        sh ${baseDir}/scripts/bdbagFetch.sh \${study}
-        echo LOG: bdbag fetched
-        sh ${baseDir}/scripts/renameFastq.sh \${study}
+        python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
+        echo LOG: fetch file split by replicates
+        sh ${baseDir}/scripts/splitBag.sh \${study}
+        echo LOG: bag recreated with replicate split fetch file
+        """
+}
+
+/*
+ * getData: fetch study files from consortium with downloaded bdbag.zip
+ */
+process getData {
+    tag "${rep.baseName}"
+    publishDir "${outDir}/temp/${task.process}", mode: "symlink"
+
+    input:
+        file deriva
+        each rep from bdbagSplit
+
+    output:
+        file("**/*.R*.fastq.gz") into fastq
+
+    script:
+        """
+        hostname
+        ulimit -a
+        replicate=\$(echo "${rep}" | cut -d'.' -f1)
+        echo LOG: \${replicate}
+        cp "${deriva}" ~/.bdbag/deriva-cookies.txt
+        echo LOG: deriva cookie loaded
+        unzip ${rep}
+        echo LOG: replicate bdbag unzipped
+        sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
+        echo LOG: replicate bdbag fetched
+        sh ${baseDir}/scripts/renameFastq.sh \${replicate}
         echo LOG: fastq.gz files renamed to replicate RID
         """
- }
+ }
\ No newline at end of file
diff --git a/workflow/scripts/modifyFetch.py b/workflow/scripts/modifyFetch.py
index 8a330e539054c8592363bd84bb4e6a0871b750f4..bae8c2286ebe3353fa7db487753bd2de8381706b 100644
--- a/workflow/scripts/modifyFetch.py
+++ b/workflow/scripts/modifyFetch.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import argparse
 import pandas as pd
 
@@ -9,9 +11,9 @@ def get_args():
 
 def main():
     args = get_args()
-    fetch = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
-    fetch_filtered = fetch[fetch[2].str[-9:]==".fastq.gz"]
-    fetch_filtered.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
+    fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
+    fetchFile_filtered = fetchFile[fetchFile[2].str[-9:]==".fastq.gz"]
+    fetchFile_filtered.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file
diff --git a/workflow/scripts/splitBag.sh b/workflow/scripts/splitBag.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3f6f6cdb610c684bdb57f666822dc0deb864fb04
--- /dev/null
+++ b/workflow/scripts/splitBag.sh
@@ -0,0 +1,7 @@
+#!/bin
+
+for i in $(ls -d Replicate_*)
+do
+rsync -r $1/ ${i} --exclude=fetch.txt
+zip -r ${i}.zip ${i}
+done
\ No newline at end of file
diff --git a/workflow/scripts/splitFetch.py b/workflow/scripts/splitFetch.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8f60043be43a70ae570800f6edb117923d91810
--- /dev/null
+++ b/workflow/scripts/splitFetch.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import argparse
+import pandas as pd
+import os
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = get_args()
+    fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
+    fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
+    replicateRID = fileFile.Replicate_RID.unique()
+    fetchArray = {i:fileFile.URI[(fileFile.Replicate_RID == i) & (fileFile.File_Type == "FastQ")] for i in replicateRID}
+    for i in replicateRID:
+        if not os.path.exists(i):
+            os.mkdir("Replicate_"+i)
+        fetchFile[fetchFile[0].str.contains('|'.join(fetchArray[i]))].to_csv("Replicate_"+i+"/fetch.txt",sep="\t",header=False,index=False)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file