diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1f93dcef7aced3bfb7ed15c9faadfaa59f1d0d2e..af9500114677d3015545c24945032b2e49454873 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -664,7 +664,7 @@ integration_se:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
   - pytest -m completionMultiqc --filename SE_multiqc_data.json
   artifacts:
@@ -689,7 +689,7 @@ integration_pe:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
   - pytest -m completionMultiqc --filename PE_multiqc_data.json
   artifacts:
@@ -716,7 +716,7 @@ failAmbiguousSpecies:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
   retry:
     max: 0
     when:
@@ -731,7 +731,7 @@ failTrunkation:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
   retry:
     max: 0
     when:
@@ -746,7 +746,7 @@ failMismatchR1R2:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
   retry:
     max: 0
     when:
@@ -761,7 +761,7 @@ failUnexpectedMeta:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
   retry:
     max: 0
     when:
@@ -776,7 +776,7 @@ failFileStructure:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
   retry:
     max: 0
     when:
@@ -791,7 +791,7 @@ override_inputBag:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
   - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
   - pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
@@ -816,7 +816,7 @@ override_fastq:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6  --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6  --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
   - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
   - pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
@@ -841,7 +841,7 @@ override_species:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
   - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
   - pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
@@ -866,7 +866,7 @@ override_stranded:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
   - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
   - pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
@@ -891,7 +891,7 @@ override_spike:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
+  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
   - find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
   - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
   - pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 947a384315a634c686794a8b1f6e0db2c531c242..3dcbde13917e1eaa3c43a60cfa75a86e478dd3f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 * Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
 * Upload empty mRNA_QC entry if data error (#111)
 * Allow forcing of strandedness and spike (#100)
+* Modify repository structure to allow for use with XPACK-DNANEXUS
 
 **Background**
 * Add memory limit (75%) per thread for samtools sort (#108)
@@ -25,6 +26,7 @@
 * Change uploadOutputBag logic to change reuse hatrac file if alread exists (re-uses Output_Bag entry by reassigning Execution_Run RID) (#112)
 * Add new CI py tests for override and integration
 * Fix fastq file and species error status detail bub (#118)
+* Make compatible with XPACK-DNANEXUS
 
 *Known Bugs*
 * Override params (inputBag, fastq, species) aren't checked for integrity
diff --git a/README.md b/README.md
index 7b715d4b94ce92f8cd93806dd21bd04481f83b0a..04f49d7d7d3ebe739023ccfa30a099136e48b19d 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,8 @@ nextflow run workflow/rna-seq.nf --repRID Q-Y5JA --source production --deriva ./
 
 Cloud Compatibility:
 --------------------
-This pipeline is also capable of being run on AWS. To do so:
+This pipeline is also capable of being run on AWS and DNAnexus. To do so:
+### [AWS](https://aws.amazon.com/)
 * Build a AWS batch queue and environment either manually or with [aws-cloudformantion](https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=Nextflow&templateURL=https://s3.amazonaws.com/aws-genomics-workflows/templates/nextflow/nextflow-aio.template.yaml)
 * Edit one of the aws configs in workflow/config/
   * Replace workDir with the S3 bucket generated
@@ -83,6 +84,14 @@ This pipeline is also capable of being run on AWS. To do so:
   * Change queue to the aws batch queue generated
 * The user must have awscli configured with an appropriate authentication (with `aws configure` and access keys) in the environment which nextflow will be run
 * Add `-profile` with the name aws config which was customized
+### DNAnexus (utilizes the [DNAnexus extension package for Nextflow (XPACK-DNANEXUS)](https://github.com/seqeralabs/xpack-dnanexus))
+* Follow the istructions from [XPACK-DNANEXUS](https://github.com/seqeralabs/xpack-dnanexus) about installing and authenticating (a valid license must be available for the extension package from Seqera Labs, as well as a subsription with DNAnexus)
+* Follow the instructions from [XPACK-DNANEXUS](https://github.com/seqeralabs/xpack-dnanexus) about launching runs. A template *json* file has been included ([dnanexusExample.json](docs/dnanexusExample.json))
+  * `[version]` should be replaced with the pipeline version required (eg: `v2.0.0`)
+  * `[credential.json]` should be replaced with the location of the credential file outpted by authentification with Deriva
+  * `[cookies.txt]` should be replaced with the location of the cookies file outpted by authentification with Deriva for BDBag
+  * `[repRID]` should be replaced with the replicate RID to be analized (eg: `Q-Y5F6`)
+  * `[outDir]` should be replaced with the location to save local outputs of the pipeline
 
 To generate you own references or new references:
 ------------------------------------------
diff --git a/cleanup.sh b/cleanup.sh
index aa289201c531fa4f4667a04f80fd015d2200e40c..0d61cfe0c4ae911824335431d8590eebfc07e70a 100644
--- a/cleanup.sh
+++ b/cleanup.sh
@@ -1,7 +1,7 @@
 rm *.out
 rm pipeline_trace*.txt*
-rm report*.html*
-rm timeline*.html*
+rm *report*.html*
+rm *timeline*.html*
 rm .nextflow*.log*
 rm -r .nextflow/
 rm -r work/
diff --git a/docs/dnanexusExample.json b/docs/dnanexusExample.json
new file mode 100644
index 0000000000000000000000000000000000000000..e03a6bccfba19f39afb59f62f711f428e6919248
--- /dev/null
+++ b/docs/dnanexusExample.json
@@ -0,0 +1,5 @@
+{
+	"pipeline_url": "https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq -r [version]",
+	"args": "-profile dnanexus --deriva [credential.json] --bdbag [cookies.txt] --repRID [repRID] --outDir [outDir],
+	"license": "$NXF_XPACK_LICENSE"
+}
diff --git a/nextflow.config b/nextflow.config
index 44f2df5255691ee4eaf11ecf9cee1af2fa27f743..288fc9d0f788b460ae1eddf8c0f32ecc5d035125 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,20 +1,23 @@
 profiles {
   standard {
-    includeConfig 'conf/biohpc.config'
+    includeConfig 'nextflowConf/biohpc.config'
   }
   biohpc {
-    includeConfig 'conf/biohpc.config'
+    includeConfig 'nextflowConf/biohpc.config'
   }
   biohpc_max {
-    includeConfig 'conf/biohpc_max.config'
+    includeConfig 'nextflowConf/biohpc_max.config'
   }
   aws_ondemand {
-    includeConfig 'conf/aws.config'
-    includeConfig 'conf/ondemand.config'
+    includeConfig 'nextflowConf/aws.config'
+    includeConfig 'nextflowConf/ondemand.config'
   }
   aws_spot {
-    includeConfig 'conf/aws.config'
-    includeConfig 'conf/spot.config'
+    includeConfig 'nextflowConf/aws.config'
+    includeConfig 'nextflowConf/spot.config'
+  }
+  dnanexus {
+    includeConfig 'nextflowConf/dnanexus.config'
   }
 }
 
diff --git a/nextflowConf/.gitkeep b/nextflowConf/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/nextflowConf/Execution_Run_For_Output_Bag.json b/nextflowConf/Execution_Run_For_Output_Bag.json
new file mode 100755
index 0000000000000000000000000000000000000000..5945b1eb8c4c5e3ec862840f232ed7a8e386d770
--- /dev/null
+++ b/nextflowConf/Execution_Run_For_Output_Bag.json
@@ -0,0 +1,64 @@
+{
+  "bag": {
+    "bag_name": "Execution_Run_{rid}",
+    "bag_algorithms": [
+      "md5"
+    ],
+    "bag_archiver": "zip",
+    "bag_metadata": {}
+  },
+  "catalog": {
+    "catalog_id": "2",
+    "query_processors": [
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Execution_Run",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Workflow",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Reference_Genome",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Input_Bag",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "mRNA_QC",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/nextflowConf/Replicate_For_Input_Bag.json b/nextflowConf/Replicate_For_Input_Bag.json
new file mode 100644
index 0000000000000000000000000000000000000000..508a0245051534fae39020792719b04d78947613
--- /dev/null
+++ b/nextflowConf/Replicate_For_Input_Bag.json
@@ -0,0 +1,97 @@
+{
+  "bag": {
+    "bag_name": "{rid}_inputBag",
+    "bag_algorithms": [
+      "md5"
+    ],
+    "bag_archiver": "zip"
+  },
+  "catalog": {
+    "query_processors": [
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Study",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Antibodies",
+          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Custom Metadata",
+          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Settings",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Replicate",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen_Anatomical_Source",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen_Cell_Types",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Single Cell Metrics",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "File",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/File_Name::ciregexp::%5B_.%5DR%5B12%5D%5C.fastq%5C.gz/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
+        }
+      }
+    ]
+  }
+}
diff --git a/workflow/conf/aws.config b/nextflowConf/aws.config
similarity index 100%
rename from workflow/conf/aws.config
rename to nextflowConf/aws.config
diff --git a/nextflowConf/bdbag.json b/nextflowConf/bdbag.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c2ab245e7d3470d8bb341136dac278360b4d99f
--- /dev/null
+++ b/nextflowConf/bdbag.json
@@ -0,0 +1,28 @@
+{
+  "fetch_config": {
+    "http": {
+      "http_cookies": {
+        "file_names": [
+            "*cookies.txt"
+        ],
+        "scan_for_cookie_files": true,
+        "search_paths": [
+            "."
+        ],
+        "search_paths_filter": "*cookies.txt"
+      }
+    },
+    "https": {
+      "http_cookies": {
+        "file_names": [
+            "*cookies.txt"
+        ],
+        "scan_for_cookie_files": true,
+        "search_paths": [
+            "."
+        ],
+        "search_paths_filter": "*cookies.txt"
+      }
+    }
+  }
+}
diff --git a/workflow/conf/biohpc.config b/nextflowConf/biohpc.config
similarity index 100%
rename from workflow/conf/biohpc.config
rename to nextflowConf/biohpc.config
diff --git a/nextflowConf/biohpc_local.config b/nextflowConf/biohpc_local.config
new file mode 100755
index 0000000000000000000000000000000000000000..d3a6c3a38689c234d65288c07d81d5b7286404c4
--- /dev/null
+++ b/nextflowConf/biohpc_local.config
@@ -0,0 +1,14 @@
+process {
+  executor = 'local'
+}
+
+singularity {
+  enabled = true
+  cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
+}
+
+env {
+  http_proxy = 'http://proxy.swmed.edu:3128'
+  https_proxy = 'http://proxy.swmed.edu:3128'
+  all_proxy = 'http://proxy.swmed.edu:3128'
+}
diff --git a/workflow/conf/biohpc_max.config b/nextflowConf/biohpc_max.config
similarity index 100%
rename from workflow/conf/biohpc_max.config
rename to nextflowConf/biohpc_max.config
diff --git a/nextflowConf/dnanexus.config b/nextflowConf/dnanexus.config
new file mode 100755
index 0000000000000000000000000000000000000000..d82ff2bd07adf66caaa827a6f86d5970f20729b6
--- /dev/null
+++ b/nextflowConf/dnanexus.config
@@ -0,0 +1,143 @@
+params {
+  refSource = "datahub"
+}
+
+process {
+  withName:trackStart {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:getBag {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:getData {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:parseMetadata {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:trimData {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:getRefInfer {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:downsampleData {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:alignSampleData {
+    machineType = 'mem3_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:inferMetadata {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:checkMetadata {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:getRef {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:alignData {
+    machineType = 'mem3_ssd1_v2_x32'
+    cpus = 32
+    memory = '256 GB'
+  }
+  withName:dedupData {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:countData {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:makeBigWig {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:fastqc {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:dataQC {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:aggrQC {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadInputBag {
+    executor = 'dnanexus'
+  }
+  withName:uploadExecutionRun {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadQC {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadProcessedFile {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadOutputBag {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:finalizeExecutionRun {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:failPreExecutionRun {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:failExecutionRun {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadQC_fail {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+}
+
+docker {
+  enabled = true
+}
diff --git a/nextflowConf/local.config b/nextflowConf/local.config
new file mode 100755
index 0000000000000000000000000000000000000000..0e4c34de957ced30e3923a9cf8fbb510d0dcd0a2
--- /dev/null
+++ b/nextflowConf/local.config
@@ -0,0 +1,7 @@
+process {
+  executor = 'local'
+}
+
+docker {
+  enabled = true
+}
diff --git a/nextflowConf/multiqc_config.yaml b/nextflowConf/multiqc_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed1375aed47a454394029e5057695b0c15babd8c
--- /dev/null
+++ b/nextflowConf/multiqc_config.yaml
@@ -0,0 +1,180 @@
+custom_logo: './bicf_logo.png'
+custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/'
+custom_logo_title: 'Bioinformatics Core Facility'
+
+report_header_info:
+  - Contact Email: 'bicf@utsouthwestern.edu'
+  - Application Type: 'RNA-Seq Analytic Pipeline for GUDMAP/RBK'
+  - Department: 'Bioinformatic Core Facility, Department of Bioinformatics, University of Texas Southwestern Medical Center'
+
+title: RNA-Seq Analytic Pipeline for GUDMAP/RBK
+
+report_comment: >
+  This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
+
+top_modules:
+  - fastqc:
+      name: 'Raw'
+      info: 'Replicate Raw fastq QC Results'
+  - cutadapt:
+      name: 'Trim'
+      info: 'Replicate Trim Adapter QC Results'
+  - hisat2:
+      name: 'Align'
+      info: 'Replicate Alignment QC Results'
+      path_filters:
+        - '*alignSummary*'
+  - picard:
+      name: 'Dedup'
+      info: 'Replicate Alignement Deduplication QC Results'
+  - rseqc:
+      name: 'Inner Distance'
+      info: 'Replicate Paired End Inner Distance Distribution Results'
+      path_filters:
+        - '*insertSize*'
+  - custom_content
+  - featureCounts:
+      name: 'Count'
+      info: 'Replicate Feature Count QC Results'
+  - hisat2:
+      name: 'Inference: Align'
+      info: 'Inference Alignment (1M downsampled reads) QC Results'
+      path_filters:
+        - '*alignSampleSummary*'
+  - rseqc:
+      name: 'Inference: Stranded'
+      info: '1M Downsampled Reads Strandedness Inference Results'
+      path_filters:
+        - '*infer_experiment*'
+
+report_section_order:
+    run:
+      order: 4000
+    rid:
+      order: 3000
+    meta:
+      order: 2000
+    ref:
+      order: 1000
+    software_versions:
+      order: -1000
+    software_references:
+      order: -2000
+
+skip_generalstats: true
+
+custom_data:
+    run:
+        file_format: 'tsv'
+        section_name: 'Run'
+        description: 'This is the run information'
+        plot_type: 'table'
+        pconfig:
+            id: 'run'
+            scale: false
+            format: '{}'
+        headers:
+            Session:
+                description: ''
+            Session ID:
+                description: 'Nextflow session ID'
+            Pipeline Version:
+                description: 'BICF pipeline version'
+            Input:
+                description: 'Input overrides'
+    rid:
+        file_format: 'tsv'
+        section_name: 'RID'
+        description: 'This is the identifying RIDs'
+        plot_type: 'table'
+        pconfig:
+            id: 'rid'
+            scale: false
+            format: '{}'
+        headers:
+            Replicate:
+                description: ''
+            Replicate RID:
+                description: 'Replicate RID'
+            Experiment RID:
+                description: 'Experiment RID'
+            Study RID:
+                description: 'Study RID'
+    meta:
+        file_format: 'tsv'
+        section_name: 'Metadata'
+        description: 'This is the comparison of infered metadata, submitter provided, and calculated'
+        plot_type: 'table'
+        pconfig:
+            id: 'meta'
+            scale: false
+            format: '{:,.0f}'
+        headers:
+            Source:
+                description: 'Metadata source'
+            Species:
+                description: 'Species'
+            Ends:
+                description: 'Single or paired end sequencing'
+            Stranded:
+                description: 'Stranded (forward/reverse) or unstranded library prep'
+            Spike-in:
+                description: 'ERCC spike in'
+            Raw Reads:
+                description: 'Number of reads of the sequencer'
+            Assigned Reads:
+                description: 'Final reads after fintering'
+            Median Read Length:
+                description: 'Average read length'
+            Median TIN:
+                description: 'Average transcript integrity number'
+
+    ref:
+        file_format: 'tsv'
+        section_name: 'Reference'
+        description: 'This is the reference version information'
+        plot_type: 'table'
+        pconfig:
+            id: 'ref'
+            scale: false
+            format: '{}'
+        headers:
+            Species:
+                description: 'Reference species'
+            Genome Reference Consortium Build:
+                description: 'Reference source build'
+            Genome Reference Consortium Patch:
+                description: 'Reference source patch version'
+            GENCODE Annotation Release:
+                description: 'Annotation release version'
+    tin:
+        file_format: 'tsv'
+        section_name: 'TIN'
+        description: 'This is the distribution of TIN values calculated by the tool RSeQC'
+        plot_type: 'bargraph'
+        pconfig:
+            id: 'tin'
+        headers:
+            chrom
+            1 - 10
+            11 - 20
+            21 - 30
+            31 - 40
+            41 - 50
+            51 - 60
+            61 - 70
+            71 - 80
+            81 - 90
+            91 - 100
+
+sp:
+    run:
+        fn: "run.tsv"
+    rid:
+        fn: 'rid.tsv'
+    meta:
+        fn: 'metadata.tsv'
+    ref:
+        fn: 'reference.tsv'
+    tin:
+        fn: '*_tin.hist.tsv'
diff --git a/workflow/conf/ondemand.config b/nextflowConf/ondemand.config
similarity index 100%
rename from workflow/conf/ondemand.config
rename to nextflowConf/ondemand.config
diff --git a/workflow/conf/spot.config b/nextflowConf/spot.config
similarity index 100%
rename from workflow/conf/spot.config
rename to nextflowConf/spot.config
diff --git a/rna-seq.nf b/rna-seq.nf
index 555f711ed3abe7a621be3de603c12a7cc56b3c13..30fb31a9f73071647582ece33e9c6c4ef945730b 100644
--- a/rna-seq.nf
+++ b/rna-seq.nf
@@ -9,15 +9,15 @@
 //  ########  ####  ######  ##
 
 // Define input variables
-params.deriva = "${baseDir}/../test_data/auth/credential.json"
-params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
+params.deriva = "${baseDir}/test_data/auth/credential.json"
+params.bdbag = "${baseDir}/test_data/auth/cookies.txt"
 //params.repRID = "16-1ZX4"
 params.repRID = "Q-Y5F6"
 params.source = "dev"
 params.refMoVersion = "38.p6.vM25"
 params.refHuVersion = "38.p13.v36"
 params.refERCCVersion = "92"
-params.outDir = "${baseDir}/../output"
+params.outDir = "${baseDir}/output"
 params.upload = false
 params.email = ""
 params.track = false
@@ -72,8 +72,9 @@ spikeForce = params.spikeForce
 email = params.email
 
 // Define fixed files and variables
-replicateExportConfig = Channel.fromPath("${baseDir}/conf/Replicate_For_Input_Bag.json")
-executionRunExportConfig = Channel.fromPath("${baseDir}/conf/Execution_Run_For_Output_Bag.json")
+bdbagConfig = Channel.fromPath("${baseDir}/workflow/conf/bdbag.json")
+replicateExportConfig = Channel.fromPath("${baseDir}/workflow/conf/Replicate_For_Input_Bag.json")
+executionRunExportConfig = Channel.fromPath("${baseDir}/workflow/conf/Execution_Run_For_Output_Bag.json")
 if (params.source == "dev") {
   source = "dev.gudmap.org"
 } else if (params.source == "staging") {
@@ -87,37 +88,37 @@ if (params.refSource == "biohpc") {
   referenceBase = "www.gudmap.org"
 }
 referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
-multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml")
-bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png")
-softwareReferences = Channel.fromPath("${baseDir}/../docs/software_references_mqc.yaml")
-softwareVersions = Channel.fromPath("${baseDir}/../docs/software_versions_mqc.yaml")
+multiqcConfig = Channel.fromPath("${baseDir}/workflow/conf/multiqc_config.yaml")
+bicfLogo = Channel.fromPath("${baseDir}/docs/bicf_logo.png")
+softwareReferences = Channel.fromPath("${baseDir}/docs/software_references_mqc.yaml")
+softwareVersions = Channel.fromPath("${baseDir}/docs/software_versions_mqc.yaml")
 
 // Define script files
-script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbag_fetch.sh")
-script_parseMeta = Channel.fromPath("${baseDir}/scripts/parse_meta.py")
-script_inferMeta = Channel.fromPath("${baseDir}/scripts/infer_meta.sh")
-script_refDataInfer = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
-script_refData = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
-script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R")
-script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R")
-script_tinHist = Channel.fromPath("${baseDir}/scripts/tin_hist.py")
-script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/upload_input_bag.py")
-script_uploadExecutionRun_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_finalizeExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_failPreExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_failExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadQC = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
-script_uploadQC_fail = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
-script_uploadOutputBag = Channel.fromPath("${baseDir}/scripts/upload_output_bag.py")
-script_deleteEntry_uploadQC = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
-script_deleteEntry_uploadQC_fail = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
-script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
+script_bdbagFetch = Channel.fromPath("${baseDir}/workflow/scripts/bdbag_fetch.sh")
+script_parseMeta = Channel.fromPath("${baseDir}/workflow/scripts/parse_meta.py")
+script_inferMeta = Channel.fromPath("${baseDir}/workflow/scripts/infer_meta.sh")
+script_refDataInfer = Channel.fromPath("${baseDir}/workflow/scripts/extract_ref_data.py")
+script_refData = Channel.fromPath("${baseDir}/workflow/scripts/extract_ref_data.py")
+script_calculateTPM = Channel.fromPath("${baseDir}/workflow/scripts/calculateTPM.R")
+script_convertGeneSymbols = Channel.fromPath("${baseDir}/workflow/scripts/convertGeneSymbols.R")
+script_tinHist = Channel.fromPath("${baseDir}/workflow/scripts/tin_hist.py")
+script_uploadInputBag = Channel.fromPath("${baseDir}/workflow/scripts/upload_input_bag.py")
+script_uploadExecutionRun_uploadExecutionRun = Channel.fromPath("${baseDir}/workflow/scripts/upload_execution_run.py")
+script_uploadExecutionRun_finalizeExecutionRun = Channel.fromPath("${baseDir}/workflow/scripts/upload_execution_run.py")
+script_uploadExecutionRun_failPreExecutionRun = Channel.fromPath("${baseDir}/workflow/scripts/upload_execution_run.py")
+script_uploadExecutionRun_failExecutionRun = Channel.fromPath("${baseDir}/workflow/scripts/upload_execution_run.py")
+script_uploadQC = Channel.fromPath("${baseDir}/workflow/scripts/upload_qc.py")
+script_uploadQC_fail = Channel.fromPath("${baseDir}/workflow/scripts/upload_qc.py")
+script_uploadOutputBag = Channel.fromPath("${baseDir}/workflow/scripts/upload_output_bag.py")
+script_deleteEntry_uploadQC = Channel.fromPath("${baseDir}/workflow/scripts/delete_entry.py")
+script_deleteEntry_uploadQC_fail = Channel.fromPath("${baseDir}/workflow/scripts/delete_entry.py")
+script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/workflow/scripts/delete_entry.py")
 
 /*
  * trackStart: track start of pipeline
  */
 process trackStart {
-  container 'docker://gudmaprbk/gudmap-rbk_base:1.0.0'
+  container 'gudmaprbk/gudmap-rbk_base:1.0.0'
   script:
     """
     hostname
@@ -211,8 +212,7 @@ process getBag {
     deriva-download-cli ${source} --catalog 2 ${replicateExportConfig} . rid=${repRID}
     echo -e "LOG: fetched" >> ${repRID}.getBag.log
 
-    name=\$(ls *.zip)
-    name=\$(basename \${name} | cut -d "." -f1)
+    name=${repRID}_inputBag
     yr=\$(date +'%Y')
     mn=\$(date +'%m')
     dy=\$(date +'%d')
@@ -240,6 +240,7 @@ process getData {
   tag "${repRID}"
 
   input:
+    path bdbagConfig
     path script_bdbagFetch
     path cookies, stageAs: "deriva-cookies.txt" from bdbag
     path inputBag from inputBag_getData
@@ -256,12 +257,6 @@ process getData {
     hostname > ${repRID}.getData.log
     ulimit -a >> ${repRID}.getData.log
 
-    # link deriva cookie for authentication
-    echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log
-    mkdir -p ~/.bdbag
-    ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
-    echo -e "LOG: linked" >> ${repRID}.getData.log
-
     # get bag basename
     replicate=\$(basename "${inputBag}")
     echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
@@ -273,10 +268,9 @@ process getData {
 
     # bag fetch fastq's only and rename by repRID
     echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
-    sh ${script_bdbagFetch} \${replicate::-13} ${repRID}
+    fastqCount=\$(sh ${script_bdbagFetch} \${replicate::-13} ${repRID})
     echo -e "LOG: fetched" >> ${repRID}.getData.log
-    
-    fastqCount=\$(ls *.fastq.gz | wc -l)
+
     if [ "\${fastqCount}" == "0" ]
     then
       touch dummy.R1.fastq.gz
@@ -790,20 +784,20 @@ process getRefInfer {
 
     # retreive appropriate reference appropriate location
     echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
+    if [ "${referenceBase}" == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
     then
       unzip \${references}.zip
       mv \$(basename \${references})/data/* .
-    elif [ params.refSource == "datahub" ]
+    elif [ "${params.refSource}" == "datahub" ]
     then
       GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
       GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
       GENCODE=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f3)
       if [ "${refName}" != "ERCC" ]
       then
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=false')
       else
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version=${refName}${refERCCVersion}/Annotation_Version=${refName}${refERCCVersion}')
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${refName}${refERCCVersion}'/Annotation_Version='${refName}${refERCCVersion}'/Used_Spike_Ins=false')
       fi
       curl --request GET \${query} > refQuery.json
       refURL=\$(python ${script_refDataInfer} --returnParam URL)
@@ -1341,6 +1335,12 @@ process uploadInputBag {
     hostname > ${repRID}.uploadInputBag.log
     ulimit -a >> ${repRID}.uploadInputBag.log
 
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadInputBag.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadInputBag.log
+
     yr=\$(date +'%Y')
     mn=\$(date +'%m')
     dy=\$(date +'%d')
@@ -1418,6 +1418,12 @@ process uploadExecutionRun {
     hostname > ${repRID}.uploadExecutionRun.log
     ulimit -a >> ${repRID}.uploadExecutionRun.log
 
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadExecutionRun.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadExecutionRun.log
+
     echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
     workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
     workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
@@ -1555,13 +1561,18 @@ process getRef {
       echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
       unzip \${reference}.zip
       mv \$(basename \${reference})/data/* .
-    elif [ arams.refSource == "datahub" ]
+    elif [ ${params.refSource} == "datahub" ]
     then
       echo -e "LOG: grabbing reference files from datahub" >> ${repRID}.getRef.log
       GRCv=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f1)
       GRCp=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f2)
       GENCODE=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f3)
-      query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
+      if [ "${spike}" == "true" ]
+      then
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=true')
+      else
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=false')
+      fi
       curl --request GET \${query} > refQuery.json
       refURL=\$(python ${script_refData} --returnParam URL)
       loc=\$(dirname \${refURL})
@@ -1740,7 +1751,7 @@ dedupBam.into {
 */
 process makeBigWig {
   tag "${repRID}"
-  publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}.bw"
+  publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}_sorted.deduped.bw"
 
   input:
     tuple path (bam), path (bai) from dedupBam_makeBigWig
@@ -2122,6 +2133,12 @@ process uploadQC {
     hostname > ${repRID}.uploadQC.log
     ulimit -a >> ${repRID}.uploadQC.log
 
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadQC.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadQC.log
+
     if [ "${ends}" == "pe" ]
     then
       end="Paired End"
@@ -2191,8 +2208,14 @@ process uploadProcessedFile {
 
   script:
     """
-    hostname > ${repRID}.outputBag.log
-    ulimit -a >> ${repRID}.outputBag.log
+    hostname > ${repRID}.uploadProcessedFile.log
+    ulimit -a >> ${repRID}.uploadProcessedFile.log
+
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadProcessedFile.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadProcessedFile.log
 
     mkdir -p ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
     cp ${bam} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
@@ -2211,14 +2234,14 @@ process uploadProcessedFile {
       do
         python3 ${script_deleteEntry_uploadProcessedFile} -r \${rid} -t Processed_File -o ${source} -c \${cookie}
       done
-      echo LOG: all old processed file RIDs deleted >> ${repRID}.uploadQC.log
+      echo LOG: all old processed file RIDs deleted >> ${repRID}.uploadProcessedFile.log
     fi
 
     deriva-upload-cli --catalog 2 --token \${cookie:9} ${source} ./deriva
-    echo LOG: processed files uploaded >> ${repRID}.outputBag.log
+    echo LOG: processed files uploaded >> ${repRID}.outpuploadProcessedFileutBag.log
 
     deriva-download-cli --catalog 2 --token \${cookie:9} ${source} ${executionRunExportConfig} . rid=${executionRunRID}
-    echo LOG: execution run bag downloaded >> ${repRID}.outputBag.log
+    echo LOG: execution run bag downloaded >> ${repRID}.uploadProcessedFile.log
 
     echo -e "### Run Details" >> runDetails.md
     echo -e "**Workflow URL:** https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq" >> runDetails.md
@@ -2236,7 +2259,7 @@ process uploadProcessedFile {
     echo -e "**Genome Assembly Version:** \${genome} patch \${patch}" >> runDetails.md
     echo -e "**Annotation Version:** GENCODE release \${annotation}" >> runDetails.md
     echo -e "**Run ID:** ${repRID}" >> runDetails.md
-    echo LOG: runDetails.md created >> ${repRID}.outputBag.log
+    echo LOG: runDetails.md created >> ${repRID}.uploadProcessedFile.log
 
     unzip Execution_Run_${executionRunRID}.zip
     yr=\$(date +'%Y')
@@ -2250,7 +2273,7 @@ process uploadProcessedFile {
     cp ${multiqcJSON} \${loc}
 
     bdbag ./${repRID}_Output_Bag/ --update --archiver zip --debug
-    echo LOG: output bag created >> ${repRID}.outputBag.log
+    echo LOG: output bag created >> ${repRID}.uploadProcessedFile.log
     """
 }
 
@@ -2288,6 +2311,12 @@ process uploadOutputBag {
     hostname > ${repRID}.uploadOutputBag.log
     ulimit -a >> ${repRID}.uploadOutputBag.log
 
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadOutputBag.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadOutputBag.log
+
     yr=\$(date +'%Y')
     mn=\$(date +'%m')
     dy=\$(date +'%d')
@@ -2606,6 +2635,12 @@ process uploadQC_fail {
     hostname > ${repRID}.uploadQC.log
     ulimit -a >> ${repRID}.uploadQC.log
 
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadQC.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadQC.log
+
     cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
     cookie=\${cookie:11:-1}
 
diff --git a/workflow/conf/bdbag.json b/workflow/conf/bdbag.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c2ab245e7d3470d8bb341136dac278360b4d99f
--- /dev/null
+++ b/workflow/conf/bdbag.json
@@ -0,0 +1,28 @@
+{
+  "fetch_config": {
+    "http": {
+      "http_cookies": {
+        "file_names": [
+            "*cookies.txt"
+        ],
+        "scan_for_cookie_files": true,
+        "search_paths": [
+            "."
+        ],
+        "search_paths_filter": "*cookies.txt"
+      }
+    },
+    "https": {
+      "http_cookies": {
+        "file_names": [
+            "*cookies.txt"
+        ],
+        "scan_for_cookie_files": true,
+        "search_paths": [
+            "."
+        ],
+        "search_paths_filter": "*cookies.txt"
+      }
+    }
+  }
+}
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
deleted file mode 100644
index 44f2df5255691ee4eaf11ecf9cee1af2fa27f743..0000000000000000000000000000000000000000
--- a/workflow/nextflow.config
+++ /dev/null
@@ -1,130 +0,0 @@
-profiles {
-  standard {
-    includeConfig 'conf/biohpc.config'
-  }
-  biohpc {
-    includeConfig 'conf/biohpc.config'
-  }
-  biohpc_max {
-    includeConfig 'conf/biohpc_max.config'
-  }
-  aws_ondemand {
-    includeConfig 'conf/aws.config'
-    includeConfig 'conf/ondemand.config'
-  }
-  aws_spot {
-    includeConfig 'conf/aws.config'
-    includeConfig 'conf/spot.config'
-  }
-}
-
-process {
-  withName:getBag {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:getData {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:parseMetadata {
-    container = 'gudmaprbk/python3:1.0.0'
-  }
-  withName:trimData {
-    container = 'gudmaprbk/trimgalore0.6.5:1.0.0'
-  }
-  withName:getRefInfer {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:downsampleData {
-    container = 'gudmaprbk/seqtk1.3:1.0.0'
-  }
-  withName:alignSampleData {
-    container = 'gudmaprbk/hisat2.2.1:1.0.0'
-  }
-  withName:inferMetadata {
-    container = 'gudmaprbk/rseqc4.0.0:1.0.0'
-  }
-  withName:checkMetadata {
-    container = 'gudmaprbk/gudmap-rbk_base:1.0.0'
-  }
-  withName:getRef {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:alignData {
-    container = 'gudmaprbk/hisat2.2.1:1.0.0'
-  }
-  withName:dedupData {
-    container = 'gudmaprbk/picard2.23.9:1.0.0'
-  }
-  withName:countData {
-    container = 'gudmaprbk/subread2.0.1:1.0.0'
-  }
-  withName:makeBigWig {
-    container = 'gudmaprbk/deeptools3.5.0:1.0.0'
-  }
-  withName:fastqc {
-    container = 'gudmaprbk/fastqc0.11.9:1.0.0'
-  }
-  withName:dataQC {
-    container = 'gudmaprbk/rseqc4.0.0:1.0.0'
-  }
-  withName:aggrQC {
-    container = 'gudmaprbk/multiqc1.9:1.0.0'
-  }
-  withName:uploadInputBag {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:uploadExecutionRun {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:uploadQC {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:uploadProcessedFile {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:uploadOutputBag {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:finalizeExecutionRun {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:failPreExecutionRun {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:failExecutionRun {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:uploadQC_fail {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-}
-
-trace {
-  enabled = false
-  file = 'trace.txt'
-  fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
-}
-
-timeline {
-  enabled = false
-  file = 'timeline.html'
-}
-
-report {
-  enabled = false
-  file = 'report.html'
-}
-
-tower {
-  accessToken = '3ade8f325d4855434b49aa387421a44c63e3360f'
-  enabled = true
-}
-
-manifest {
-  name = 'gudmap_rbk/rna-seq'
-  homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
-  description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
-  mainScript = 'rna-seq.nf'
-  version = 'v2.0.0rc01'
-  nextflowVersion = '>=19.09.0'
-}
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
new file mode 120000
index 0000000000000000000000000000000000000000..2984ceedc04dab65543aa5707372e679b4d7653a
--- /dev/null
+++ b/workflow/nextflow.config
@@ -0,0 +1 @@
+../nextflow.config
\ No newline at end of file
diff --git a/workflow/nextflowConf b/workflow/nextflowConf
new file mode 120000
index 0000000000000000000000000000000000000000..8e5c4cf73c241a65e5274d161bba48c582b1c9d3
--- /dev/null
+++ b/workflow/nextflowConf
@@ -0,0 +1 @@
+../nextflowConf/
\ No newline at end of file
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
deleted file mode 100644
index 555f711ed3abe7a621be3de603c12a7cc56b3c13..0000000000000000000000000000000000000000
--- a/workflow/rna-seq.nf
+++ /dev/null
@@ -1,2650 +0,0 @@
-#!/usr/bin/env nextflow
-
-//  ########  ####  ######  ########
-//  ##     ##  ##  ##    ## ##
-//  ##     ##  ##  ##       ##
-//  ########   ##  ##       ######
-//  ##     ##  ##  ##       ##
-//  ##     ##  ##  ##    ## ##
-//  ########  ####  ######  ##
-
-// Define input variables
-params.deriva = "${baseDir}/../test_data/auth/credential.json"
-params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
-//params.repRID = "16-1ZX4"
-params.repRID = "Q-Y5F6"
-params.source = "dev"
-params.refMoVersion = "38.p6.vM25"
-params.refHuVersion = "38.p13.v36"
-params.refERCCVersion = "92"
-params.outDir = "${baseDir}/../output"
-params.upload = false
-params.email = ""
-params.track = false
-
-
-// Define override input variable
-params.refSource = "biohpc"
-params.inputBagForce = ""
-params.fastqsForce = ""
-params.speciesForce = ""
-params.strandedForce = ""
-params.spikeForce = ""
-
-// Define tracking input variables
-params.ci = false
-params.dev = true
-
-
-// Parse input variables
-deriva = Channel
-  .fromPath(params.deriva)
-  .ifEmpty { exit 1, "deriva credential file not found: ${params.deriva}" }
-deriva.into {
-  deriva_getBag
-  deriva_getRefInfer
-  deriva_getRef
-  deriva_uploadInputBag
-  deriva_uploadExecutionRun
-  deriva_uploadQC
-  deriva_uploadQC_fail
-  deriva_uploadProcessedFile
-  deriva_uploadOutputBag
-  deriva_finalizeExecutionRun
-  deriva_failPreExecutionRun
-  deriva_failExecutionRun
-}
-bdbag = Channel
-  .fromPath(params.bdbag)
-  .ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" }
-repRID = params.repRID
-refMoVersion = params.refMoVersion
-refHuVersion = params.refHuVersion
-refERCCVersion = params.refERCCVersion
-outDir = params.outDir
-logsDir = "${outDir}/Logs"
-upload = params.upload
-inputBagForce = params.inputBagForce
-fastqsForce = params.fastqsForce
-speciesForce = params.speciesForce
-strandedForce = params.strandedForce
-spikeForce = params.spikeForce
-email = params.email
-
-// Define fixed files and variables
-replicateExportConfig = Channel.fromPath("${baseDir}/conf/Replicate_For_Input_Bag.json")
-executionRunExportConfig = Channel.fromPath("${baseDir}/conf/Execution_Run_For_Output_Bag.json")
-if (params.source == "dev") {
-  source = "dev.gudmap.org"
-} else if (params.source == "staging") {
-  source = "staging.gudmap.org"
-} else if (params.source == "production") {
-  source = "www.gudmap.org"
-}
-if (params.refSource == "biohpc") {
-  referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references/new"
-} else if (params.refSource == "datahub") {
-  referenceBase = "www.gudmap.org"
-}
-referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
-multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml")
-bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png")
-softwareReferences = Channel.fromPath("${baseDir}/../docs/software_references_mqc.yaml")
-softwareVersions = Channel.fromPath("${baseDir}/../docs/software_versions_mqc.yaml")
-
-// Define script files
-script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbag_fetch.sh")
-script_parseMeta = Channel.fromPath("${baseDir}/scripts/parse_meta.py")
-script_inferMeta = Channel.fromPath("${baseDir}/scripts/infer_meta.sh")
-script_refDataInfer = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
-script_refData = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
-script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R")
-script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R")
-script_tinHist = Channel.fromPath("${baseDir}/scripts/tin_hist.py")
-script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/upload_input_bag.py")
-script_uploadExecutionRun_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_finalizeExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_failPreExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadExecutionRun_failExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
-script_uploadQC = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
-script_uploadQC_fail = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
-script_uploadOutputBag = Channel.fromPath("${baseDir}/scripts/upload_output_bag.py")
-script_deleteEntry_uploadQC = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
-script_deleteEntry_uploadQC_fail = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
-script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
-
-/*
- * trackStart: track start of pipeline
- */
-process trackStart {
-  container 'docker://gudmaprbk/gudmap-rbk_base:1.0.0'
-  script:
-    """
-    hostname
-    ulimit -a
-
-    curl -H 'Content-Type: application/json' -X PUT -d \
-      '{ \
-        "sessionId": "${workflow.sessionId}", \
-        "pipeline": "gudmap.rbk_rnaseq", \
-        "start": "${workflow.start}", \
-        "repRID": "${repRID}", \
-        "astrocyte": false, \
-        "status": "started", \
-        "nextflowVersion": "${workflow.nextflow.version}", \
-        "pipelineVersion": "${workflow.manifest.version}", \
-        "ci": ${params.ci}, \
-        "dev": ${params.dev} \
-      }' \
-      "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
-
-    if [ ${params.track} == true ]
-    then
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "repRID": "${repRID}", \
-          "PipelineVersion": "${workflow.manifest.version}", \
-          "Server": "${params.source}", \
-          "Queued": "NA", \
-          "CheckedOut": "NA", \
-          "Started": "${workflow.start}" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-    """
-}
-
-log.info """\
-====================================
-BICF RNA-seq Pipeline for GUDMAP/RBK
-====================================
-Replicate RID          : ${params.repRID}
-Source Server          : ${params.source}
-Mouse Reference Version: ${params.refMoVersion}
-Human Reference Version: ${params.refHuVersion}
-ERCC Reference Version : ${params.refERCCVersion}
-Reference source       : ${params.refSource}
-Output Directory       : ${params.outDir}
-Upload                 : ${upload}
-Track                  : ${params.track}
-------------------------------------
-Nextflow Version       : ${workflow.nextflow.version}
-Pipeline Version       : ${workflow.manifest.version}
-Session ID             : ${workflow.sessionId}
-------------------------------------
-CI                     : ${params.ci}
-Development            : ${params.dev}
-------------------------------------
-"""
-
-/*
- * getBag: download input bag
- */
-process getBag {
-  tag "${repRID}"
-  publishDir "${outDir}/inputBag", mode: 'copy', pattern: "*_inputBag_*.zip"
-
-  input:
-    path credential, stageAs: "credential.json" from deriva_getBag
-    path replicateExportConfig
-
-  output:
-    path ("*.zip") into bag
-
-  when:
-    inputBagForce == ""
-
-  script:
-    """
-    hostname > ${repRID}.getBag.log
-    ulimit -a >> ${repRID}.getBag.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.getBag.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.getBag.log
-
-    # deriva-download replicate RID
-    echo -e "LOG: fetching bag for ${repRID} in GUDMAP" >> ${repRID}.getBag.log
-    deriva-download-cli ${source} --catalog 2 ${replicateExportConfig} . rid=${repRID}
-    echo -e "LOG: fetched" >> ${repRID}.getBag.log
-
-    name=\$(ls *.zip)
-    name=\$(basename \${name} | cut -d "." -f1)
-    yr=\$(date +'%Y')
-    mn=\$(date +'%m')
-    dy=\$(date +'%d')
-    mv \${name}.zip \${name}_\${yr}\${mn}\${dy}.zip
-    """
-}
-
-// Set inputBag to downloaded or forced input
-if (inputBagForce != "") {
-  inputBag = Channel
-    .fromPath(inputBagForce)
-    .ifEmpty { exit 1, "override inputBag file not found: ${inputBagForce}" }
-} else {
-  inputBag = bag
-}
-inputBag.into {
-  inputBag_getData
-  inputBag_uploadInputBag
-}
-
-/*
- * getData: fetch replicate files from consortium with downloaded bdbag.zip
- */
-process getData {
-  tag "${repRID}"
-
-  input:
-    path script_bdbagFetch
-    path cookies, stageAs: "deriva-cookies.txt" from bdbag
-    path inputBag from inputBag_getData
-
-  output:
-    path ("*.R{1,2}.fastq.gz") into fastqs
-    path ("**/File.csv") into fileMeta
-    path ("**/Experiment Settings.csv") into experimentSettingsMeta
-    path ("**/Experiment.csv") into experimentMeta
-    path "fastqCount.csv" into fastqCount_fl
-
-  script:
-    """
-    hostname > ${repRID}.getData.log
-    ulimit -a >> ${repRID}.getData.log
-
-    # link deriva cookie for authentication
-    echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log
-    mkdir -p ~/.bdbag
-    ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
-    echo -e "LOG: linked" >> ${repRID}.getData.log
-
-    # get bag basename
-    replicate=\$(basename "${inputBag}")
-    echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
-
-    # unzip bag
-    echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log
-    unzip ${inputBag}
-    echo -e "LOG: unzipped" >> ${repRID}.getData.log
-
-    # bag fetch fastq's only and rename by repRID
-    echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
-    sh ${script_bdbagFetch} \${replicate::-13} ${repRID}
-    echo -e "LOG: fetched" >> ${repRID}.getData.log
-    
-    fastqCount=\$(ls *.fastq.gz | wc -l)
-    if [ "\${fastqCount}" == "0" ]
-    then
-      touch dummy.R1.fastq.gz
-    fi
-    echo "\${fastqCount}" > fastqCount.csv
-    """
-}
-
-// Split fastq count into channel
-fastqCount = Channel.create()
-fastqCount_fl.splitCsv(sep: ",", header: false).separate(
-  fastqCount
-)
-
-// Set raw fastq to downloaded or forced input and replicate them for multiple process inputs
-if (fastqsForce != "") {
-  Channel
-    .fromPath(fastqsForce)
-    .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
-    .collect().into {
-      fastqs_parseMetadata
-      fastqs_fastqc
-    }
-} else {
-  fastqs.collect().into {
-    fastqs_parseMetadata
-    fastqs_fastqc
-  }
-}
-
-/*
- * parseMetadata: parses metadata to extract experiment parameters
-*/
-process parseMetadata {
-  tag "${repRID}"
-
-  input:
-    path script_parseMeta
-    path file from fileMeta
-    path experimentSettings, stageAs: "ExperimentSettings.csv" from experimentSettingsMeta
-    path experiment from experimentMeta
-    path (fastq) from fastqs_parseMetadata.collect()
-    val fastqCount
-
-  output:
-    path "design.csv" into metadata_fl
-    path "fastqError.csv" into fastqError_fl
-
-  script:
-    """
-    hostname > ${repRID}.parseMetadata.log
-    ulimit -a >> ${repRID}.parseMetadata.log
-
-    # check replicate RID metadata
-    rep=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p repRID)
-    echo -e "LOG: replicate RID metadata parsed: \${rep}" >> ${repRID}.parseMetadata.log
-
-    # get experiment RID metadata
-    exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p expRID)
-    echo -e "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log
-
-    # get study RID metadata
-    study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p studyRID)
-    echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log
-
-    # get endedness metadata
-    endsRaw=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta)
-    echo -e "LOG: endedness metadata parsed: \${endsRaw}" >> ${repRID}.parseMetadata.log
-    if [ "\${endsRaw}" == "Single End" ]
-    then
-      endsMeta="se"
-    elif [ "\${endsRaw}" == "Paired End" ]
-    then
-      endsMeta="pe"
-    elif [ "\${endsRaw}" == "Single Read" ]
-    # "Single Read" depreciated as of Jan 2021, this option is present for backwards compatibility
-    then
-      endsMeta="se"
-    elif [ "\${endsRaw}" == "nan" ]
-    then
-      endsRaw="_No value_"
-      endsMeta="NA"
-    fi
-
-    # ganually get endness
-    if [ "${fastqCount}" == "1" ]
-    then
-      endsManual="se"
-    else
-      endsManual="pe"
-    fi
-    echo -e "LOG: endedness manually detected: ${fastqCount}" >> ${repRID}.parseMetadata.log
-
-    # get strandedness metadata
-    stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p stranded)
-    echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
-    if [ "\${stranded}" == "nan" ]
-    then
-      stranded="_No value_"
-    fi
-
-    # get spike-in metadata
-    spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike)
-    echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
-    if [ "\${spike}" == "nan" ]
-    then
-      spike="_No value_"
-    fi
-    if [ "\${spike}" == "f" ]
-    then
-      spike="false"
-    elif [ "\${spike}" == "t" ]
-    then
-      spike="true"
-    elif [ "\${spike}" == "no" ]
-    # "yes"/"no" depreciated as of Jan 2021, this option is present for backwards compatibility
-    then
-      spike="false"
-    elif [ "\${spike}" == "yes" ]
-    # "yes"/"no" depreciated as of Jan 2021, this option is present for backwards compatibility
-    then
-      spike="true"
-    elif [ "\${spike}" == "nan" ]
-    then
-      endsRaw="_No value_"
-      endsMeta="NA"
-    fi
-
-    # get species metadata
-    species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species)
-    echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log
-    if [ "\${species}" == "nan" ]
-    then
-      species="_No value_"
-    fi
-
-    # get read length metadata
-    readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength)
-    if [ "\${readLength}" = "nan" ]
-    then
-      readLength="NA"
-    fi
-    echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
-
-    # check not incorrect number of fastqs
-    fastqCountError=false
-    fastqCountError_details=""
-    if [ "${fastqCount}" -gt "2" ]
-    then
-      fastqCountError=true
-      fastqCountError_details="**Too many fastqs detected (>2)**"
-    elif [ "${fastqCount}" -eq "0" ]
-    then
-      fastqCountError=true
-      fastqCountError_details="**No valid fastqs detected \\(may not match {_.}R{12}.fastq.gz convention\\)**"
-    elif [ "\${endsMeta}" == "se" ] && [ "${fastqCount}" -ne "1" ]
-    then
-      fastqCountError=true
-      fastqCountError_details="**Number of fastqs detected does not match submitted endness**"
-    elif [ "\${endsMeta}" == "pe" ] && [ "${fastqCount}" -ne "2" ]
-    then
-      fastqCountError=true
-      fastqCountError_details="**Number of fastqs detected does not match submitted endness**"
-    fi
-
-    # check read counts match for fastqs
-    fastqReadError=false
-    fastqReadError_details=""
-    if [ "\${endsManual}" == "pe" ]
-    then
-      r1Count=\$(zcat ${fastq[0]} | wc -l)
-      r2Count=\$(zcat ${fastq[1]} | wc -l)
-      if [ "\${r1Count}" -ne "\${r2Count}" ]
-      then
-        fastqReadError=true
-        fastqReadError_details="**Number of reads do not match for R1 and R2:** there may be a trunkation or mismatch of fastq files"
-      fi
-    fi
-
-    # save design file
-    echo "\${endsMeta},\${endsRaw},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv
-
-    # save fastq error file
-    echo "\${fastqCountError},\${fastqCountError_details},\${fastqReadError},\${fastqReadError_details}" > fastqError.csv
-    """
-}
-
-// Split metadata into separate channels
-endsMeta = Channel.create()
-endsRaw = Channel.create()
-endsManual = Channel.create()
-strandedMeta = Channel.create()
-spikeMeta = Channel.create()
-speciesMeta = Channel.create()
-readLengthMeta = Channel.create()
-expRID = Channel.create()
-studyRID = Channel.create()
-metadata_fl.splitCsv(sep: ",", header: false).separate(
-  endsMeta,
-  endsRaw,
-  endsManual,
-  strandedMeta,
-  spikeMeta,
-  speciesMeta,
-  readLengthMeta,
-  expRID,
-  studyRID
-)
-
-// Replicate metadata for multiple process inputs
-endsMeta.into {
-  endsMeta_checkMetadata
-  endsMeta_aggrQC
-  endsMeta_failExecutionRun
-}
-endsManual.into {
-  endsManual_trimData
-  endsManual_downsampleData
-  endsManual_alignSampleData
-  endsManual_aggrQC
-}
-strandedMeta.into {
-  strandedMeta_checkMetadata
-  strandedMeta_aggrQC
-  strandedMeta_failExecutionRun
-}
-spikeMeta.into {
-  spikeMeta_checkMetadata
-  spikeMeta_aggrQC
-  spikeMeta_failPreExecutionRun
-  spikeMeta_failExecutionRun
-}
-speciesMeta.into {
-  speciesMeta_checkMetadata
-  speciesMeta_aggrQC
-  speciesMeta_failPreExecutionRun
-  speciesMeta_failExecutionRun
-}
-studyRID.into {
-  studyRID_aggrQC
-  studyRID_uploadInputBag
-  studyRID_uploadProcessedFile
-  studyRID_uploadOutputBag
-}
-expRID.into {
-  expRID_aggrQC
-  expRID_uploadProcessedFile
-}
-
-// Split fastq count error into separate channel
-fastqCountError = Channel.create()
-fastqCountError_details = Channel.create()
-fastqReadError = Channel.create()
-fastqReadError_details = Channel.create()
-fastqError_fl.splitCsv(sep: ",", header: false).separate(
-  fastqCountError,
-  fastqCountError_details,
-  fastqReadError,
-  fastqReadError_details
-)
-
-//  Replicate errors for multiple process inputs
-fastqCountError.into {
-  fastqCountError_fastqc
-  fastqCountError_trimData
-  fastqCountError_getRefInfer
-  fastqCountError_downsampleData
-  fastqCountError_alignSampleData
-  fastqCountError_inferMetadata
-  fastqCountError_checkMetadata
-  fastqCountError_uploadExecutionRun
-  fastqCountError_getRef
-  fastqCountError_alignData
-  fastqCountError_dedupData
-  fastqCountError_makeBigWig
-  fastqCountError_countData
-  fastqCountError_dataQC
-  fastqCountError_aggrQC
-  fastqCountError_uploadQC
-  fastqCountError_uploadQC_fail
-  fastqCountError_uploadProcessedFile
-  fastqCountError_uploadOutputBag
-  fastqCountError_failPreExecutionRun_fastq
-}
-fastqReadError.into {
-  fastqReadError_fastqc
-  fastqReadError_trimData
-  fastqReadError_getRefInfer
-  fastqReadError_downsampleData
-  fastqReadError_alignSampleData
-  fastqReadError_inferMetadata
-  fastqReadError_checkMetadata
-  fastqReadError_uploadExecutionRun
-  fastqReadError_getRef
-  fastqReadError_alignData
-  fastqReadError_dedupData
-  fastqReadError_makeBigWig
-  fastqReadError_countData
-  fastqReadError_dataQC
-  fastqReadError_aggrQC
-  fastqReadError_uploadQC
-  fastqReadError_uploadQC_fail
-  fastqReadError_uploadProcessedFile
-  fastqReadError_uploadOutputBag
-  fastqReadError_failPreExecutionRun_fastq
-}
-
-/*
- *fastqc: run fastqc on untrimmed fastq's
-*/
-process fastqc {
-  tag "${repRID}"
-
-  input:
-    path (fastq) from fastqs_fastqc.collect()
-    val fastqCountError_fastqc
-    val fastqReadError_fastqc
-
-  output:
-    path ("*.R{1,2}.fastq.gz", includeInputs:true) into fastqs_trimData
-    path ("*_fastqc.zip") into fastqc
-    path ("rawReads.csv") into rawReadsInfer_fl
-    path "fastqFileError.csv" into fastqFileError_fl
-
-  when:
-    fastqCountError_fastqc == 'false' && fastqReadError_fastqc == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.fastqc.log
-    ulimit -a >> ${repRID}.fastqc.log
-
-    # run fastqc
-    echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log
-    fastqc *.fastq.gz -o . &> fastqc.out || true
-    fastqcErrorOut=\$(cat fastqc.out | grep -c 'Failed to process file') || fastqcErrorOut=0
-    fastqFileError=false
-    fastqFileError_details=""
-    if [ "\${fastqcErrorOut}" -ne "0" ]
-    then
-      fastqFileError=true
-      fastqFileError_details="**There is an error with the structure of the fastq**"
-      echo -e "LOG: There is an error with the structure of the fastq" >> ${repRID}.fastqc.log
-      touch dummy_fastqc.zip
-    else
-      echo -e "LOG: The structure of the fastq is correct" >> ${repRID}.fastqc.log
-    fi
-
-    # count raw reads
-    zcat *.R1.fastq.gz | echo \$((`wc -l`/4)) > rawReads.csv
-
-    # save fastq error file
-    echo "\${fastqFileError},\${fastqFileError_details}" > fastqFileError.csv
-    """
-}
-
-// Extract number of raw reads metadata into channel
-rawReadsInfer = Channel.create()
-rawReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
-  rawReadsInfer
-)
-
-// Replicate inferred raw reads for multiple process inputs
-rawReadsInfer.into {
-  rawReadsInfer_aggrQC
-  rawReadsInfer_uploadQC
-}
-
-// Split fastq count error into separate channel
-fastqFileError = Channel.create()
-fastqFileError_details = Channel.create()
-fastqFileError_fl.splitCsv(sep: ",", header: false).separate(
-  fastqFileError,
-  fastqFileError_details
-)
-
-//  Replicate errors for multiple process inputs
-fastqFileError.into {
-  fastqFileError_fastqc
-  fastqFileError_trimData
-  fastqFileError_getRefInfer
-  fastqFileError_downsampleData
-  fastqFileError_alignSampleData
-  fastqFileError_inferMetadata
-  fastqFileError_checkMetadata
-  fastqFileError_uploadExecutionRun
-  fastqFileError_getRef
-  fastqFileError_alignData
-  fastqFileError_dedupData
-  fastqFileError_makeBigWig
-  fastqFileError_countData
-  fastqFileError_dataQC
-  fastqFileError_aggrQC
-  fastqFileError_uploadQC
-  fastqFileError_uploadQC_fail
-  fastqFileError_uploadProcessedFile
-  fastqFileError_uploadOutputBag
-  fastqFileError_failPreExecutionRun_fastqFile
-}
-
-/*
- * trimData: trims any adapter or non-host sequences from the data
-*/
-process trimData {
-  tag "${repRID}"
-
-  input:
-    path (fastq) from fastqs_trimData
-    val ends from endsManual_trimData
-    val fastqCountError_trimData
-    val fastqReadError_trimData
-    val fastqFileError_trimData
-
-  output:
-    path ("*.fq.gz") into fastqsTrim
-    path ("*_trimming_report.txt") into trimQC
-    path ("readLength.csv") into readLengthInfer_fl
-
-  when:
-    fastqCountError_trimData == "false"
-    fastqReadError_trimData == "false"
-    fastqFileError_trimData == "false"
-
-  script:
-    """
-    hostname > ${repRID}.trimData.log
-    ulimit -a >> ${repRID}.trimData.log
-
-    # trim fastq's using trim_galore and extract median read length
-    echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
-    if [ "${ends}" == "se" ]
-    then
-      trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]}
-      readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
-    elif [ "${ends}" == "pe" ]
-    then
-      trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]}
-      readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
-    fi
-    echo -e "LOG: trimmed" >> ${repRID}.trimData.log
-    echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log
-
-    # save read length file
-    echo "\${readLength}" > readLength.csv
-    """
-}
-
-// Extract calculated read length metadata into channel
-readLengthInfer = Channel.create()
-readLengthInfer_fl.splitCsv(sep: ",", header: false).separate(
-  readLengthInfer
-)
-
-// Replicate inferred read length for multiple process inputs
-readLengthInfer.into {
-  readLengthInfer_aggrQC
-  readLengthInfer_uploadQC
-}
-// Replicate trimmed fastq's for multiple process inputs
-fastqsTrim.into {
-  fastqsTrim_alignData
-  fastqsTrim_downsampleData
-}
-
-// Combine inputs of getRefInfer
-getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refDataInfer.combine(fastqCountError_getRefInfer.combine(fastqReadError_getRefInfer.combine(fastqFileError_getRefInfer)))))
-
-/*
-  * getRefInfer: dowloads appropriate reference for metadata inference
-*/
-process getRefInfer {
-  tag "${refName}"
-
-  input:
-    tuple val (refName), path (credential, stageAs: "credential.json"), path (script_refDataInfer), val (fastqCountError), val (fastqReadError), val (fastqFileError) from getRefInferInput
-
-  output:
-    tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf")  into refInfer
-    path ("${refName}", type: 'dir') into bedInfer
-
-  when:
-    fastqCountError == "false"
-    fastqReadError == "false"
-    fastqFileError == "false"
-
-  script:
-    """
-    hostname > ${repRID}.${refName}.getRefInfer.log
-    ulimit -a >> ${repRID}.${refName}.getRefInfer.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.${refName}.getRefInfer.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.${refName}.getRefInfer.log
-
-    # set the reference name
-    if [ "${refName}" == "ERCC" ]
-    then
-      references=\$(echo ${referenceBase}/ERCC${refERCCVersion})
-    elif [ "${refName}" == "GRCm" ]
-    then
-      references=\$(echo ${referenceBase}/GRCm${refMoVersion})
-    elif [ '${refName}' == "GRCh" ]
-    then
-      references=\$(echo ${referenceBase}/GRCh${refHuVersion})
-    else
-      echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
-      exit 1
-    fi
-
-    # retreive appropriate reference appropriate location
-    echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
-    then
-      unzip \${references}.zip
-      mv \$(basename \${references})/data/* .
-    elif [ params.refSource == "datahub" ]
-    then
-      GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
-      GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
-      GENCODE=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f3)
-      if [ "${refName}" != "ERCC" ]
-      then
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
-      else
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version=${refName}${refERCCVersion}/Annotation_Version=${refName}${refERCCVersion}')
-      fi
-      curl --request GET \${query} > refQuery.json
-      refURL=\$(python ${script_refDataInfer} --returnParam URL)
-      loc=\$(dirname \${refURL})
-      fName=\$(python ${script_refDataInfer} --returnParam fName)
-      fName=\${fName%.*}
-      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
-      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
-      deriva-hatrac-cli --host ${referenceBase} get \${refURL}
-      unzip \$(basename \${refURL})
-      mv \${fName}/data/* .
-    fi
-    mv ./annotation/genome.gtf .
-    mv ./sequence/genome.fna .
-    mkdir ${refName}
-    if [ "${refName}" != "ERCC" ]
-    then
-      mv ./annotation/genome.bed ./${refName}
-    fi
-    echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
-    """
-}
-
-/*
- * downsampleData: downsample fastq's for metadata inference
- */
-process downsampleData {
-  tag "${repRID}"
-
-  input:
-    path fastq from fastqsTrim_downsampleData
-    val ends from endsManual_downsampleData
-    val fastqCountError_downsampleData
-    val fastqReadError_downsampleData
-    val fastqFileError_downsampleData
-
-  output:
-    path ("sampled.1.fq") into fastqs1Sample
-    path ("sampled.2.fq") into fastqs2Sample
-
-  when:
-    fastqCountError_downsampleData == "false"
-    fastqReadError_downsampleData == "false"
-    fastqFileError_downsampleData == "false"
-
-  script:
-    """
-    hostname > ${repRID}.downsampleData.log
-    ulimit -a >> ${repRID}.downsampleData.log
-
-    if [ "${ends}" == "se" ]
-    then
-      echo -e "LOG: downsampling SE trimmed fastq" >> ${repRID}.downsampleData.log
-      seqtk sample -s100 *trimmed.fq.gz 100000 1> sampled.1.fq
-      touch sampled.2.fq
-    elif [ "${ends}" == "pe" ]
-    then
-      echo -e "LOG: downsampling R1 of PE trimmed fastq" >> ${repRID}.downsampleData.log
-      seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq
-      echo -e "LOG: downsampling R2 of PE trimmed fastq" >> ${repRID}.downsampleData.log
-      seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq
-    fi
-    echo -e "LOG: downsampled" >> ${repRID}.downsampleData.log
-    """
-}
-
-// Replicate the dowsampled fastq's and attatched to the references
-inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect().combine(fastqCountError_alignSampleData.combine(fastqReadError_alignSampleData.combine(fastqFileError_alignSampleData))))))
-
-/*
- * alignSampleData: aligns the downsampled reads to a reference database
-*/
-process alignSampleData {
-  tag "${ref}"
-
-  input:
-    tuple val (ends), val (ref), path (hisat2), path (fna), path (gtf), path (fastq1), path (fastq2), val (fastqCountError), val (fastqReadError), val (fastqFileError) from inferInput
-
-  output:
-    path ("${ref}.sampled.sorted.bam") into sampleBam
-    path ("${ref}.sampled.sorted.bam.bai") into sampleBai
-    path ("${ref}.alignSampleSummary.txt") into alignSampleQC
-
-  when:
-    fastqCountError == "false"
-    fastqReadError == "false"
-    fastqFileError == "false"
-
-  script:
-    """
-    hostname > ${repRID}.${ref}.alignSampleData.log
-    ulimit -a >> ${repRID}.${ref}.alignSampleData.log
-
-    # align the reads with Hisat2
-    echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log
-    if [ "${ends}" == "se" ]
-    then
-
-      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary
-    elif [ "${ends}" == "pe" ]
-    then
-      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary
-    fi
-    echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log
-
-    # convert the output sam file to a sorted bam file using Samtools
-    echo -e "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log
-    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam
-
-    # sort the bam file using Samtools
-    echo -e "LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.log
-    proc=\$(expr `nproc` - 1)
-    mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*')
-    mem=\$(expr \${mem} / \${proc} \\* 85 / 100)
-    samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam
-
-    # index the sorted bam using Samtools
-    echo -e "LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.log
-    samtools index -@ `nproc` -b ${ref}.sampled.sorted.bam ${ref}.sampled.sorted.bam.bai
-    """
-}
-
-alignSampleQC.into {
-  alignSampleQC_inferMetadata
-  alignSampleQC_aggrQC
-}
-
-process inferMetadata {
-  tag "${repRID}"
-
-  input:
-    path script_inferMeta
-    path beds from bedInfer.collect()
-    path bam from sampleBam.collect()
-    path bai from sampleBai.collect()
-    path alignSummary from alignSampleQC_inferMetadata.collect()
-    val strandedForce
-    val spikeForce
-    val fastqCountError_inferMetadata
-    val fastqReadError_inferMetadata
-    val fastqFileError_inferMetadata
-
-  output:
-    path "infer.csv" into inferMetadata_fl
-    path "${repRID}.infer_experiment.txt" into inferExperiment
-    path "speciesError.csv" into speciesError_fl
-
-  when:
-    fastqCountError_inferMetadata == "false"
-    fastqReadError_inferMetadata == "false"
-    fastqFileError_inferMetadata == "false"
-
-  script:
-    """
-    hostname > ${repRID}.inferMetadata.log
-    ulimit -a >> ${repRID}.inferMetadata.log
-
-    # collect alignment rates (round down to integers)
-    align_ercc=\$(echo \$(grep "Overall alignment rate" ERCC.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_ercc=\$(echo \${align_ercc%.*})
-    echo -e "LOG: alignment rate to ERCC: \${align_ercc}" >> ${repRID}.inferMetadata.log
-    align_hu=\$(echo \$(grep "Overall alignment rate" GRCh.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_hu=\$(echo \${align_hu%.*})
-    echo -e "LOG: alignment rate to GRCh: \${align_hu}" >> ${repRID}.inferMetadata.log
-    align_mo=\$(echo \$(grep "Overall alignment rate" GRCm.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_mo=\$(echo \${align_mo%.*})
-    echo -e "LOG: alignment rate to GRCm: \${align_mo}" >> ${repRID}.inferMetadata.log
-
-    # determine spike-in
-    if [ 1 -eq \$(echo \$(expr \${align_ercc} ">=" 10)) ]
-    then
-      spike="true"
-    else
-      spike="false"
-    fi
-    echo -e "LOG: inference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
-    if [ "${spikeForce}" != "" ]
-    then
-      spike=${spikeForce}
-      echo -e "LOG: spike-in metadata forced: \${spike}" >> ${repRID}.parseMetadata.log
-    fi
-
-    speciesError=false
-    speciesError_details=""
-    # determine species
-    if [ 1 -eq \$(echo \$(expr \${align_hu} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_mo} "<" 40)) ]
-    then
-      species="Homo sapiens"
-      bam="GRCh.sampled.sorted.bam"
-      bed="./GRCh/genome.bed"
-      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
-    elif [ 1 -eq \$(echo \$(expr \${align_mo} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_hu} "<" 40)) ]
-    then
-      species="Mus musculus"
-      bam="GRCm.sampled.sorted.bam"
-      bed="./GRCm/genome.bed"
-      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
-    else
-      echo -e "LOG: ERROR - inference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
-      if [ "${speciesForce}" == "" ]
-      then
-        speciesError=true
-        speciesError_details="**Inference of species returns an ambiguous result:** Percent aligned to human = \${align_hu} and percent aligned to mouse = \${align_mo}"
-      fi
-    fi
-    if [ "${speciesForce}" != "" ]
-    then
-      speciesError=false
-      echo -e "LOG: species overridden to: ${speciesForce}"
-      species="${speciesForce}"
-      if [ "${speciesForce}" == "Homo sapiens" ]
-      then
-        bam="GRCh.sampled.sorted.bam"
-        bed="./GRCh/genome.bed"
-      elif [ "${speciesForce}" == "Mus musculus" ]
-      then
-        bam="GRCm.sampled.sorted.bam"
-        bed="./GRCm/genome.bed"
-      fi
-    fi
-
-    if [ "\${speciesError}" == false ]
-    then
-      # infer experimental setting from dedup bam
-      echo -e "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
-      infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
-      echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
-
-      ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
-      fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
-      if [ \${ended} == "PairEnd" ]
-      then
-        ends="pe"
-        percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
-        percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
-      elif [ \${ended} == "SingleEnd" ]
-      then
-        ends="se"
-        percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
-        percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
-      fi
-      echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
-      echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
-      if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
-      then
-        stranded="forward"
-      elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
-      then
-        stranded="reverse"
-      else
-        stranded="unstranded"
-      fi
-      echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
-      if [ "${strandedForce}" != "" ]
-      then
-        stranded=${strandedForce}
-        echo -e "LOG: spike-in metadata forced: \${stranded}" >> ${repRID}.inferMetadata.log
-      fi
-    else
-      ends=""
-      stranded=""
-      spike=""
-      species=""
-      percentF=""
-      percentR=""
-      fail=""
-      touch ${repRID}.infer_experiment.txt
-    fi
-
-    # write inferred metadata to file
-    echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" > infer.csv
-
-    # save species error file
-    echo "\${speciesError},\${speciesError_details}" > speciesError.csv
-    """
-}
-
-// Split metadata into separate channels
-endsInfer = Channel.create()
-strandedInfer = Channel.create()
-spikeInfer = Channel.create()
-speciesInfer = Channel.create()
-align_erccInfer = Channel.create()
-align_huInfer = Channel.create()
-align_moInfer = Channel.create()
-percentFInfer = Channel.create()
-percentRInfer = Channel.create()
-failInfer = Channel.create()
-inferMetadata_fl.splitCsv(sep: ",", header: false).separate(
-  endsInfer,
-  strandedInfer,
-  spikeInfer,
-  speciesInfer,
-  align_erccInfer,
-  align_huInfer,
-  align_moInfer,
-  percentFInfer,
-  percentRInfer,
-  failInfer
-)
-
-// Replicate metadata for multiple process inputs
-endsInfer.into {
-  endsInfer_checkMetadata
-  endsInfer_alignData
-  endsInfer_countData
-  endsInfer_dataQC
-  endsInfer_aggrQC
-  endsInfer_uploadQC
-  endsInfer_failExecutionRun
-}
-strandedInfer.into {
-  strandedInfer_checkMetadata
-  strandedInfer_alignData
-  strandedInfer_countData
-  strandedInfer_aggrQC
-  strandedInfer_uploadQC
-  strandedInfer_failExecutionRun
-}
-spikeInfer.into{
-  spikeInfer_checkMetadata
-  spikeInfer_getRef
-  spikeInfer_aggrQC
-  spikeInfer_uploadExecutionRun
-  spikeInfer_failExecutionRun
-}
-speciesInfer.into {
-  speciesInfer_checkMetadata
-  speciesInfer_getRef
-  speciesInfer_aggrQC
-  speciesInfer_uploadExecutionRun
-  speciesInfer_uploadProcessedFile
-  speciesInfer_failExecutionRun
-}
-
-// Split species count error into separate channel
-speciesError = Channel.create()
-speciesError_details = Channel.create()
-speciesError_fl.splitCsv(sep: ",", header: false).separate(
-  speciesError,
-  speciesError_details
-)
-
-//  Replicate errors for multiple process inputs
-speciesError.into {
-  speciesError_checkMetadata
-  speciesError_uploadExecutionRun
-  speciesError_getRef
-  speciesError_alignData
-  speciesError_dedupData
-  speciesError_makeBigWig
-  speciesError_countData
-  speciesError_fastqc
-  speciesError_dataQC
-  speciesError_aggrQC
-  speciesError_uploadQC
-  speciesError_uploadQC_fail
-  speciesError_uploadProcessedFile
-  speciesError_uploadOutputBag
-  speciesError_failPreExecutionRun_species
-}
-
-/* 
- * checkMetadata: checks the submitted metada against inferred
-*/
-process checkMetadata {
-  tag "${repRID}"
-
-  input:
-    val endsMeta from endsMeta_checkMetadata
-    val strandedMeta from strandedMeta_checkMetadata
-    val spikeMeta from spikeMeta_checkMetadata
-    val speciesMeta from speciesMeta_checkMetadata
-    val endsInfer from endsInfer_checkMetadata
-    val strandedInfer from strandedInfer_checkMetadata
-    val spikeInfer from spikeInfer_checkMetadata
-    val speciesInfer from speciesInfer_checkMetadata
-    val fastqCountError_checkMetadata
-    val fastqReadError_checkMetadata
-    val fastqFileError_checkMetadata
-    val speciesError_checkMetadata
-
-  output:
-    path ("check.csv") into checkMetadata_fl
-    path ("outputBagRID.csv") optional true into outputBagRID_fl_dummy
-
-  when:
-    fastqCountError_checkMetadata == "false"
-    fastqReadError_checkMetadata == "false"
-    fastqFileError_checkMetadata == "false"
-    speciesError_checkMetadata == "false"
-
-  script:
-    """
-    hostname > ${repRID}.checkMetadata.log
-    ulimit -a >> ${repRID}.checkMetadata.log
-
-    pipelineError=false
-    pipelineError_ends=false
-    pipelineError_stranded=false
-    pipelineError_spike=false
-    pipelineError_species=false
-    # check if submitted metadata matches inferred
-    if [ "${strandedMeta}" != "${strandedInfer}" ]
-    then
-      if [ "${params.strandedForce}" != "" ]
-      then
-        pipelineError=false
-        pipelineError_stranded=false
-        echo -e "LOG: stranded forced: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
-      else
-        pipelineError=true
-        pipelineError_stranded=true
-        if [ "${strandedMeta}" == "stranded" ]
-        then
-          if [[ "${strandedInfer}" == "forward" ]] || [[ "${strandedInfer}" == "reverse" ]]
-          then
-            pipelineError=false
-            pipelineError_stranded=false
-            echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
-          else
-            echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
-          fi
-        else
-          echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
-        fi
-      fi
-    else
-      pipelineError=false
-      pipelineError_stranded=false
-      echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
-    fi
-    if [ "${endsMeta}" != "${endsInfer}" ]
-    then
-      pipelineError=true
-      pipelineError_ends=true
-      echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
-    else
-      pipelineError_ends=false
-      echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
-    fi
-    if [ "${spikeMeta}" != "${spikeInfer}" ]
-    then
-      if [[ "${params.spikeForce}" != "" ]]
-      then
-        pipelineError_spike=false
-        echo -e "LOG: spike forced: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-      else
-        pipelineError=true
-        pipelineError_spike=true
-        echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-      fi
-    else
-      pipelineError_spike=false
-      echo -e "LOG: spike matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-    fi
-    if [ "${speciesMeta}" != "${speciesInfer}" ]
-    then
-    if [[ "${params.speciesForce}" != "" ]]
-      then
-        pipelineError_species=false
-        echo -e "LOG: species forced: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
-      else
-        pipelineError=true
-        pipelineError_species=true
-        echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
-      fi
-    else
-      pipelineError_species=false
-      echo -e "LOG: species matches: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
-    fi
-
-    # create dummy output bag rid if failure
-    if [ \${pipelineError} == true ]
-    then
-      echo "fail" > outputBagRID.csv
-    fi
-
-    # write checks to file
-    echo "\${pipelineError},\${pipelineError_ends},\${pipelineError_stranded},\${pipelineError_spike},\${pipelineError_species}" > check.csv
-    """
-}
-
-// Split errors into separate channels
-pipelineError = Channel.create()
-pipelineError_ends = Channel.create()
-pipelineError_stranded = Channel.create()
-pipelineError_spike = Channel.create()
-pipelineError_species = Channel.create()
-checkMetadata_fl.splitCsv(sep: ",", header: false).separate(
-  pipelineError,
-  pipelineError_ends,
-  pipelineError_stranded,
-  pipelineError_spike,
-  pipelineError_species
-)
-
-// Replicate errors for multiple process inputs
-pipelineError.into {
-  pipelineError_getRef
-  pipelineError_alignData
-  pipelineError_dedupData
-  pipelineError_makeBigWig
-  pipelineError_countData
-  pipelineError_fastqc
-  pipelineError_dataQC
-  pipelineError_aggrQC
-  pipelineError_uploadQC
-  pipelineError_uploadQC_fail
-  pipelineError_uploadProcessedFile
-  pipelineError_uploadOutputBag
-  pipelineError_failExecutionRun
-}
-
-/* 
- * uploadInputBag: uploads the input bag
-*/
-process uploadInputBag {
-  tag "${repRID}"
-
-  input:
-    path script_uploadInputBag
-    path credential, stageAs: "credential.json" from deriva_uploadInputBag
-    path inputBag from inputBag_uploadInputBag
-    val studyRID from studyRID_uploadInputBag
-
-  output:
-    path ("inputBagRID.csv") into inputBagRID_fl
-
-  when:
-    upload
-
-  script:
-    """
-    hostname > ${repRID}.uploadInputBag.log
-    ulimit -a >> ${repRID}.uploadInputBag.log
-
-    yr=\$(date +'%Y')
-    mn=\$(date +'%m')
-    dy=\$(date +'%d')
-
-    file=\$(basename -a ${inputBag})
-    md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
-    echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
-    size=\$(wc -c < ./\${file})
-    echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
-    
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
-    if [ "\${exist}" == "[]" ]
-    then
-        cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-        cookie=\${cookie:11:-1}
-
-        loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
-        inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
-        echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
-        rid=\${inputBag_rid}
-    else
-        exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-        exist=\${exist:7:-6}
-        echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
-        rid=\${exist}
-    fi
-
-    echo "\${rid}" > inputBagRID.csv
-    """
-}
-
-// Extract input bag RID into channel
-inputBagRID = Channel.create()
-inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
-  inputBagRID
-)
-
-// Replicate input bag RID for multiple process inputs
-inputBagRID.into {
-  inputBagRID_uploadExecutionRun
-  inputBagRID_finalizeExecutionRun
-  inputBagRID_failPreExecutionRun
-  inputBagRID_failExecutionRun
-}
-
-/* 
- * uploadExecutionRun: uploads the execution run
-*/
-process uploadExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun_uploadExecutionRun
-    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
-    val spike from spikeInfer_uploadExecutionRun
-    val species from speciesInfer_uploadExecutionRun
-    val inputBagRID from inputBagRID_uploadExecutionRun
-    val fastqCountError_uploadExecutionRun
-    val fastqReadError_uploadExecutionRun
-    val fastqFileError_uploadExecutionRun
-    val speciesError_uploadExecutionRun
-    
-  output:
-    path ("executionRunRID.csv") into executionRunRID_fl
-
-  when:
-    upload
-    fastqCountError_uploadExecutionRun == "false"
-    fastqReadError_uploadExecutionRun == "false"
-    fastqFileError_uploadExecutionRun == "false"
-    speciesError_uploadExecutionRun == "false"
-
-  script:
-    """
-    hostname > ${repRID}.uploadExecutionRun.log
-    ulimit -a >> ${repRID}.uploadExecutionRun.log
-
-    echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
-    workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
-    workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    workflow=\${workflow:7:-6}
-    echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
-
-    if [ "${species}" == "Homo sapiens" ]
-    then
-      genomeName=\$(echo GRCh${refHuVersion})
-    elif [ "${species}" == "Mus musculus" ]
-    then
-      genomeName=\$(echo GRCm${refMoVersion})
-    fi
-    if [ "${spike}" == "true" ]
-    then
-      genomeName=\$(echo \${genomeName}-S)
-    fi
-    echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
-    genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
-    genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    genome=\${genome:7:-6}
-    echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
-    echo \${exist} >> ${repRID}.uploadExecutionRun.log
-    if [ "\${exist}" == "[]" ]
-    then
-      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
-      echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-    else
-      rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-      rid=\${rid:7:-6}
-      echo \${rid} >> ${repRID}.uploadExecutionRun.log
-      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
-      echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-    fi
-
-    echo "\${executionRun_rid}" > executionRunRID.csv
-
-    if [ ${params.track} == true ]
-    then
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "ExecutionRunRID": "'\${executionRun_rid}'" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-    """
-}
-
-// Extract execution run RID into channel
-executionRunRID = Channel.create()
-executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
-  executionRunRID
-)
-
-// Replicate execution run RID for multiple process inputs
-executionRunRID.into {
-  executionRunRID_uploadQC
-  executionRunRID_uploadProcessedFile
-  executionRunRID_uploadOutputBag
-  executionRunRID_finalizeExecutionRun
-  executionRunRID_failExecutionRun
-  executionRunRID_fail
-}
-
-/*
-  * getRef: downloads appropriate reference
-*/
-process getRef {
-  tag "${species}"
-
-  input:
-    path script_refData
-    path credential, stageAs: "credential.json" from deriva_getRef
-    val spike from spikeInfer_getRef
-    val species from speciesInfer_getRef
-    val fastqCountError_getRef
-    val fastqReadError_getRef
-    val fastqFileError_getRef
-    val speciesError_getRef
-    val pipelineError_getRef
-
-  output:
-    tuple path ("hisat2", type: 'dir'), path ("*.bed"), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv")  into reference
-
-  when:
-    fastqCountError_getRef == "false"
-    fastqReadError_getRef == "false"
-    fastqFileError_getRef == "false"
-    speciesError_getRef == "false"
-    pipelineError_getRef == "false"
-
-  script:
-    """
-    hostname > ${repRID}.getRef.log
-    ulimit -a >> ${repRID}.getRef.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.getRef.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.getRef.log
-
-    # set the reference name
-    if [ "${species}" == "Mus musculus" ]
-    then
-      reference=\$(echo ${referenceBase}/GRCm${refMoVersion})
-      refName=GRCm
-    elif [ '${species}' == "Homo sapiens" ]
-    then
-      reference=\$(echo ${referenceBase}/GRCh${refHuVersion})
-      refName=GRCh
-    else
-      echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species}" >> ${repRID}.getRef.log
-      exit 1
-    fi
-    if [ "${spike}" == "true" ]
-    then
-      reference=\$(echo \${reference}-S)
-    elif [ "${spike}" == "false" ]
-    then
-      reference=\$(echo \${reference})
-    fi
-    echo -e "LOG: species set to \${reference}" >> ${repRID}.getRef.log
-
-    # retreive appropriate reference appropriate location
-    echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
-    then
-      echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
-      unzip \${reference}.zip
-      mv \$(basename \${reference})/data/* .
-    elif [ arams.refSource == "datahub" ]
-    then
-      echo -e "LOG: grabbing reference files from datahub" >> ${repRID}.getRef.log
-      GRCv=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f1)
-      GRCp=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f2)
-      GENCODE=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f3)
-      query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
-      curl --request GET \${query} > refQuery.json
-      refURL=\$(python ${script_refData} --returnParam URL)
-      loc=\$(dirname \${refURL})
-      fName=\$(python ${script_refData} --returnParam fName)
-      fName=\${fName%.*}
-      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
-      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
-      deriva-hatrac-cli --host ${referenceBase} get \${refURL}
-      unzip \$(basename \${refURL})
-      mv \${fName}/data/* .
-    fi
-    echo -e "LOG: fetched" >> ${repRID}.getRef.log
-
-    mv ./annotation/genome.gtf .
-    mv ./sequence/genome.fna .
-    mv ./annotation/genome.bed .
-    mv ./metadata/Entrez.tsv .
-    mv ./metadata/geneID.tsv .
-    """
-}
-
-// Replicate reference for multiple process inputs
-reference.into {
-  reference_alignData
-  reference_countData
-  reference_dataQC
-}
-
-/*
- * alignData: aligns the reads to a reference database
-*/
-process alignData {
-  tag "${repRID}"
-
-  input:
-    path fastq from fastqsTrim_alignData
-    path reference_alignData
-    val ends from endsInfer_alignData
-    val stranded from strandedInfer_alignData
-    val fastqCountError_alignData
-    val fastqReadError_alignData
-    val fastqFileError_alignData
-    val speciesError_alignData
-    val pipelineError_alignData
-
-  output:
-    tuple path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam
-    path ("*.alignSummary.txt") into alignQC
-
-  when:
-    fastqCountError_alignData == "false"
-    fastqReadError_alignData == "false"
-    fastqFileError_alignData == "false"
-    speciesError_alignData == "false"
-    pipelineError_alignData == "false"
-
-  script:
-    """
-    hostname > ${repRID}.align.log
-    ulimit -a >> ${repRID}.align.log
-
-    # set stranded param for hisat2
-    if [ "${stranded}"=="unstranded" ]
-    then
-      strandedParam=""
-    elif [ "${stranded}" == "forward" ] && [ "${ends}" == "se" ]
-    then
-        strandedParam="--rna-strandness F"
-    elif [ "${stranded}" == "forward" ] && [ "${ends}" == "pe" ]
-    then
-      strandedParam="--rna-strandness FR"
-    elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "se" ]
-    then
-        strandedParam="--rna-strandness R"
-    elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "pe" ]
-    then
-      strandedParam="--rna-strandness RF"
-    fi
-
-    # align the reads with Hisat2
-    echo -e "LOG: aligning ${ends}" >> ${repRID}.align.log
-    if [ "${ends}" == "se" ]
-    then
-      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary
-    elif [ "${ends}" == "pe" ]
-    then
-      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary
-    fi
-    echo -e "LOG: alignined" >> ${repRID}.align.log
-
-    # convert the output sam file to a sorted bam file using Samtools
-    echo -e "LOG: converting from sam to bam" >> ${repRID}.align.log
-    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam
-
-    # sort the bam file using Samtools
-    echo -e "LOG: sorting the bam file" >> ${repRID}.align.log
-    proc=\$(expr `nproc` - 1)
-    mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*')
-    mem=\$(expr \${mem} / \${proc} \\* 75 / 100)
-    samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${repRID}.sorted.bam ${repRID}.bam
-
-    # index the sorted bam using Samtools
-    echo -e "LOG: indexing sorted bam file" >> ${repRID}.align.log
-    samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai
-    """
-}
-
-// Replicate rawBam for multiple process inputs
-rawBam.set {
-  rawBam_dedupData
-}
-
-/*
- *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
-*/
-process dedupData {
-  tag "${repRID}"
-  publishDir "${outDir}/bam", mode: 'copy', pattern: "*.deduped.bam"
-
-  input:
-    tuple path (bam), path (bai) from rawBam_dedupData
-    val fastqCountError_dedupData
-    val fastqReadError_dedupData
-    val fastqFileError_dedupData
-    val speciesError_dedupData
-    val pipelineError_dedupData
-
-  output:
-    tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam
-    tuple path ("${repRID}_sorted.deduped.*.bam"), path ("${repRID}_sorted.deduped.*.bam.bai") into dedupChrBam
-    path ("*.deduped.Metrics.txt") into dedupQC
-
-  when:
-    fastqCountError_dedupData == 'false'
-    fastqReadError_dedupData == 'false'
-    fastqFileError_dedupData == 'false'
-    speciesError_dedupData == 'false'
-    pipelineError_dedupData == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.dedup.log
-    ulimit -a >> ${repRID}.dedup.log
-
-    # remove duplicated reads using Picard's MarkDuplicates
-    echo -e "LOG: deduplicating reads" >> ${repRID}.dedup.log
-    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${bam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true
-    echo -e "LOG: deduplicated" >> ${repRID}.dedup.log
-
-    # sort the bam file using Samtools
-    echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log
-    samtools sort -@ `nproc` -O BAM -o ${repRID}_sorted.deduped.bam ${repRID}.deduped.bam
-
-    # index the sorted bam using Samtools
-    echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log
-    samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai
-
-    # split the deduped BAM file for multi-threaded tin calculation
-    for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | grep -o chr.[0-9]* | sort | uniq`;
-      do
-      echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai"
-    done | parallel -j `nproc` -k
-    """
-}
-
-// Replicate dedup bam/bai for multiple process inputs
-dedupBam.into {
-  dedupBam_countData
-  dedupBam_makeBigWig
-  dedupBam_dataQC
-  dedupBam_uploadProcessedFile
-}
-
-/*
- *makeBigWig: make BigWig files for output
-*/
-process makeBigWig {
-  tag "${repRID}"
-  publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}.bw"
-
-  input:
-    tuple path (bam), path (bai) from dedupBam_makeBigWig
-    val fastqCountError_makeBigWig
-    val fastqReadError_makeBigWig
-    val fastqFileError_makeBigWig
-    val speciesError_makeBigWig
-    val pipelineError_makeBigWig
-
-  output:
-    path ("${repRID}_sorted.deduped.bw") into bigwig
-
-  when:
-    fastqCountError_makeBigWig == 'false'
-    fastqReadError_makeBigWig == 'false'
-    fastqFileError_makeBigWig == 'false'
-    speciesError_makeBigWig == 'false'
-    pipelineError_makeBigWig == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.makeBigWig.log
-    ulimit -a >> ${repRID}.makeBigWig.log
-
-    # create bigwig
-    echo -e "LOG: creating bibWig" >> ${repRID}.makeBigWig.log
-    bamCoverage -p `nproc` -b ${bam} -o ${repRID}_sorted.deduped.bw
-    echo -e "LOG: created" >> ${repRID}.makeBigWig.log
-    """
-}
-
-/*
- *countData: count data and calculate tpm
-*/
-process countData {
-  tag "${repRID}"
-  publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*_tpmTable.csv"
-
-  input:
-    path script_calculateTPM
-    path script_convertGeneSymbols
-    tuple path (bam), path (bai) from dedupBam_countData
-    path ref from reference_countData
-    val ends from endsInfer_countData
-    val stranded from strandedInfer_countData
-    val fastqCountError_countData
-    val fastqReadError_countData
-    val fastqFileError_countData
-    val speciesError_countData
-    val pipelineError_countData
-
-  output:
-    path ("*_tpmTable.csv") into counts
-    path ("*_countData.summary") into countsQC
-    path ("assignedReads.csv") into assignedReadsInfer_fl
-
-  when:
-    fastqCountError_countData == 'false'
-    fastqReadError_countData == 'false'
-    fastqFileError_countData == 'false'
-    speciesError_countData == 'false'
-    pipelineError_countData == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.countData.log
-    ulimit -a >> ${repRID}.countData.log
-
-    # determine strandedness and setup strandig for countData
-    stranding=0;
-    if [ "${stranded}" == "unstranded" ]
-    then
-      stranding=0
-      echo -e "LOG: strandedness set to unstranded [0]" >> ${repRID}.countData.log
-    elif [ "${stranded}" == "forward" ]
-    then
-      stranding=1
-      echo -e "LOG: strandedness set to forward stranded [1]" >> ${repRID}.countData.log
-    elif [ "${stranded}" == "reverse" ]
-    then
-      stranding=2
-      echo -e "LOG: strandedness set to reverse stranded [2]" >> ${repRID}.countData.log
-    fi
-
-    # run featureCounts
-    echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log
-    if [ "${ends}" == "se" ]
-    then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
-    elif [ "${ends}" == "pe" ]
-    then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
-    fi
-    echo -e "LOG: counted" >> ${repRID}.countData.log
-
-    # extract assigned reads
-    grep -m 1 'Assigned' *_countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv
-
-    # calculate TPM from the resulting countData table
-    echo -e "LOG: calculating TPM with R" >> ${repRID}.countData.log
-    Rscript ${script_calculateTPM} --count "${repRID}_countData"
-
-    # convert gene symbols to Entrez id's
-    echo -e "LOG: convert gene symbols to Entrez id's" >> ${repRID}.countData.log
-    Rscript ${script_convertGeneSymbols} --repRID "${repRID}"
-    """
-}
-
-// Extract number of assigned reads metadata into channel
-assignedReadsInfer = Channel.create()
-assignedReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
-  assignedReadsInfer
-)
-
-// Replicate inferred assigned reads for multiple process inputs
-assignedReadsInfer.into {
-  assignedReadsInfer_aggrQC
-  assignedReadsInfer_uploadQC
-}
-
-/*
- *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
-*/
-process dataQC {
-  tag "${repRID}"
-
-  input:
-    path script_tinHist
-    path ref from reference_dataQC
-    tuple path (bam), path (bai) from dedupBam_dataQC
-    tuple path (chrBam), path (chrBai) from dedupChrBam
-    val ends from endsInfer_dataQC
-    val fastqCountError_dataQC
-    val fastqReadError_dataQC
-    val fastqFileError_dataQC
-    val speciesError_dataQC
-    val pipelineError_dataQC
-
-  output:
-    path "${repRID}_tin.hist.tsv" into tinHist
-    path "${repRID}_tin.med.csv" into  tinMedInfer_fl
-    path "${repRID}_insertSize.inner_distance_freq.txt" into innerDistance
-
-  when:
-    fastqCountError_dataQC == 'false'
-    fastqReadError_dataQC == 'false'
-    fastqFileError_dataQC == 'false'
-    speciesError_dataQC == 'false'
-    pipelineError_dataQC == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.dataQC.log
-    ulimit -a >> ${repRID}.dataQC.log
-
-    # calcualte TIN values per feature on each chromosome
-    echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}_sorted.deduped.tin.xls
-    for i in `cat ./genome.bed | cut -f1 | grep -o chr.[0-9]* | sort | uniq`; do
-      echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam  -r ./genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
-    done | parallel -j `nproc` -k 1>> ${repRID}_sorted.deduped.tin.xls
-
-    # bin TIN values
-    echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log
-    python3 ${script_tinHist} -r ${repRID}
-    echo -e "LOG: binned" >> ${repRID}.dataQC.log
-
-    # calculate inner-distances for PE data
-    if [ "${ends}" == "pe" ]
-    then
-      echo -e "LOG: calculating inner distances for ${ends}" >> ${repRID}.dataQC.log
-      inner_distance.py -i "${bam}" -o ${repRID}_insertSize -r ./genome.bed
-      echo -e "LOG: calculated" >> ${repRID}.dataQC.log
-    elif [ "${ends}" == "se" ]
-    then
-      echo -e "LOG: creating dummy inner distance file for ${ends}" >> ${repRID}.dataQC.log
-      touch ${repRID}_insertSize.inner_distance_freq.txt
-    fi
-    """
-}
-
-// Extract median TIN metadata into channel
-tinMedInfer = Channel.create()
-tinMedInfer_fl.splitCsv(sep: ",", header: false).separate(
-  tinMedInfer
-)
-
-// Replicate inferred median TIN for multiple process inputs
-tinMedInfer.into {
-  tinMedInfer_aggrQC
-  tinMedInfer_uploadQC
-}
-
-/*
- *aggrQC: aggregate QC from processes as well as metadata and run MultiQC
-*/
-process aggrQC {
-  tag "${repRID}"
-  publishDir "${outDir}/report", mode: 'copy', pattern: "${repRID}.multiqc.html"
-  publishDir "${outDir}/qc", mode: 'copy', pattern: "${repRID}.multiqc_data.json"
-
-  input:
-    path multiqcConfig
-    path bicfLogo
-    path softwareReferences
-    path softwareVersions
-    path fastqc
-    path trimQC
-    path alignQC
-    path dedupQC
-    path countsQC
-    path innerDistance
-    path tinHist
-    path alignSampleQCs from alignSampleQC_aggrQC.collect()
-    path inferExperiment
-    val endsManual from endsManual_aggrQC
-    val endsM from endsMeta_aggrQC
-    val strandedM from strandedMeta_aggrQC
-    val spikeM from spikeMeta_aggrQC
-    val speciesM from speciesMeta_aggrQC
-    val endsI from endsInfer_aggrQC
-    val strandedI from strandedInfer_aggrQC
-    val spikeI from spikeInfer_aggrQC
-    val speciesI from speciesInfer_aggrQC
-    val readLengthM from readLengthMeta
-    val readLengthI from readLengthInfer_aggrQC
-    val rawReadsI from rawReadsInfer_aggrQC
-    val assignedReadsI from assignedReadsInfer_aggrQC
-    val tinMedI from tinMedInfer_aggrQC
-    val studyRID from studyRID_aggrQC
-    val expRID from expRID_aggrQC
-    val fastqCountError_aggrQC
-    val fastqReadError_aggrQC
-    val fastqFileError_aggrQC
-    val speciesError_aggrQC
-    val pipelineError_aggrQC
-
-  output:
-    path "${repRID}.multiqc.html" into multiqc
-    path "${repRID}.multiqc_data.json" into multiqcJSON
-
-  when:
-    fastqCountError_aggrQC == 'false'
-    fastqReadError_aggrQC == 'false'
-    fastqFileError_aggrQC == 'false'
-    speciesError_aggrQC == 'false'
-    pipelineError_aggrQC == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.aggrQC.log
-    ulimit -a >> ${repRID}.aggrQC.log
-
-    # make run table
-    if [ "${params.inputBagForce}" == "" ] && [ "${params.fastqsForce}" == "" ] && [ "${params.speciesForce}" == "" ] && [ "${params.strandedForce}" == "" ] && [ "${params.spikeForce}" == "" ]
-    then
-      input="default"
-    else
-      input="override:"
-      if [ "${params.inputBagForce}" != "" ]
-      then
-        input=\$(echo \${input} inputBag)
-      fi
-      if [ "${params.fastqsForce}" != "" ]
-      then
-        input=\$(echo \${input} fastq)
-      fi
-      if [ "${params.speciesForce}" != "" ]
-      then
-        input=\$(echo \${input} species)
-      fi
-      if [ "${params.strandedForce}" != "" ]
-      then
-        input=\$(echo \${input} stranded)
-      fi
-      if [ "${params.spikeForce}" != "" ]
-      then
-        input=\$(echo \${input} spike)
-      fi
-    fi
-    echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log
-    echo -e "Session\tSession ID\tStart Time\tPipeline Version\tInput" > run.tsv
-    echo -e "Session\t${workflow.sessionId}\t${workflow.start}\t${workflow.manifest.version}\t\${input}" >> run.tsv
-
-
-    # make RID table
-    echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log
-    echo -e "Replicate\tReplicate RID\tExperiment RID\tStudy RID" > rid.tsv
-    echo -e "Replicate\t${repRID}\t${expRID}\t${studyRID}" >> rid.tsv
-
-    # make metadata table
-    echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
-    echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv
-    echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t'${readLengthM}'\t-" >> metadata.tsv
-    if [ "${params.speciesForce}" == "" ]
-    then
-      input=\$(echo "Inferred\\t${speciesI}\\t")
-    else
-      input=\$(echo "Inferred\\t${speciesI} (FORCED)\\t")
-    fi
-    input=\$(echo \${input}"${endsI}\\t")
-    if [ "${params.strandedForce}" == "" ]
-    then
-      input=\$(echo \${input}"${strandedI}\\t")
-    else
-      input=\$(echo \${input}"${strandedI} (FORCED)\\t")
-    fi
-    if [ "${params.spikeForce}" == "" ]
-    then
-      input=\$(echo \${input}"${spikeI}\\t-\\t-\\t-\\t-")
-    else
-      input=\$(echo \${input}"${spikeI} (FORCED)\\t-\\t-\\t-\\t-")
-    fi
-    echo -e \${input} >> metadata.tsv
-    echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv
-
-    # make reference table
-    echo -e "LOG: creating referencerun table" >> ${repRID}.aggrQC.log
-    echo -e "Species\tGenome Reference Consortium Build\tGenome Reference Consortium Patch\tGENCODE Annotation Release" > reference.tsv
-    echo -e "Human\tGRCh\$(echo `echo ${params.refHuVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refHuVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refHuVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
-    echo -e "Mouse\tGRCm\$(echo `echo ${params.refMoVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refMoVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refMoVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
-
-    # remove inner distance report if it is empty (SE repRID)
-    echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
-    if [ "${endsM}" == "se" ]
-    then
-      rm -f ${innerDistance}
-    fi
-
-    # run MultiQC
-    echo -e "LOG: running multiqc" >> ${repRID}.aggrQC.log
-    multiqc -c ${multiqcConfig} . -n ${repRID}.multiqc.html
-    cp ${repRID}.multiqc_data/multiqc_data.json ${repRID}.multiqc_data.json
-
-    if [ ${params.track} == true ]
-    then
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        @./${repRID}.multiqc_data.json \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/qc"
-    fi
-    """
-}
-
-/* 
- * uploadQC: uploads the mRNA QC
-*/
-process uploadQC {
-  tag "${repRID}"
-
-  input:
-    path script_deleteEntry_uploadQC
-    path script_uploadQC
-    path credential, stageAs: "credential.json" from deriva_uploadQC
-    val executionRunRID from executionRunRID_uploadQC
-    val ends from endsInfer_uploadQC
-    val stranded from strandedInfer_uploadQC
-    val length from readLengthInfer_uploadQC
-    val rawCount from rawReadsInfer_uploadQC
-    val finalCount from assignedReadsInfer_uploadQC
-    val tinMed from tinMedInfer_uploadQC
-    val fastqCountError_uploadQC
-    val fastqReadError_uploadQC
-    val fastqFileError_uploadQC
-    val speciesError_uploadQC
-    val pipelineError_uploadQC
-
-  output:
-    path ("qcRID.csv") into qcRID_fl
-
-  when:
-    upload
-    fastqCountError_uploadQC == 'false'
-    fastqReadError_uploadQC == 'false'
-    fastqFileError_uploadQC == 'false'
-    speciesError_uploadQC == 'false'
-    pipelineError_uploadQC == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.uploadQC.log
-    ulimit -a >> ${repRID}.uploadQC.log
-
-    if [ "${ends}" == "pe" ]
-    then
-      end="Paired End"
-    elif [ "${ends}" == "se" ]
-    then
-      end="Single End"
-    fi
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=${repRID})
-    if [ "\${exist}" != "[]" ]
-    then
-      rids=\$(echo \${exist} | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//')
-      for rid in \${rids}
-      do
-        python3 ${script_deleteEntry_uploadQC} -r \${rid} -t mRNA_QC -o ${source} -c \${cookie}
-        echo LOG: old mRNA QC RID deleted - \${rid} >> ${repRID}.uploadQC.log
-      done
-      echo LOG: all old mRNA QC RIDs deleted >> ${repRID}.uploadQC.log
-    fi
-
-    qc_rid=\$(python3 ${script_uploadQC} -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -t ${tinMed} -o ${source} -c \${cookie} -u F)
-    echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log
-
-    echo "\${qc_rid}" > qcRID.csv
-    """
-}
-
-/*
- *uploadProcessedFile: uploads the processed files
-*/
-process uploadProcessedFile {
-  tag "${repRID}"
-  publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip"
-
-  input:
-    path script_deleteEntry_uploadProcessedFile
-    path credential, stageAs: "credential.json" from deriva_uploadProcessedFile
-    path executionRunExportConfig
-    path multiqc
-    path multiqcJSON
-    tuple path (bam),path (bai) from dedupBam_uploadProcessedFile
-    path bigwig
-    path counts
-    val species from speciesInfer_uploadProcessedFile
-    val studyRID from studyRID_uploadProcessedFile
-    val expRID from expRID_uploadProcessedFile
-    val executionRunRID from executionRunRID_uploadProcessedFile
-    val fastqCountError_uploadProcessedFile
-    val fastqReadError_uploadProcessedFile
-    val fastqFileError_uploadProcessedFile
-    val speciesError_uploadProcessedFile
-    val pipelineError_uploadProcessedFile
-
-  output:
-    path ("${repRID}_Output_Bag.zip") into outputBag
-
-  when:
-    upload
-    fastqCountError_uploadProcessedFile == 'false'
-    fastqReadError_uploadProcessedFile == 'false'
-    fastqFileError_uploadProcessedFile == 'false'
-    speciesError_uploadProcessedFile == 'false'
-    pipelineError_uploadProcessedFile == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.outputBag.log
-    ulimit -a >> ${repRID}.outputBag.log
-
-    mkdir -p ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
-    cp ${bam} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
-    cp ${bai} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
-    cp ${bigwig} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
-    cp ${counts} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=${repRID})
-    if [ "\${exist}" != "[]" ]
-    then
-      rids=\$(echo \${exist} | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//')
-      for rid in \${rids}
-      do
-        python3 ${script_deleteEntry_uploadProcessedFile} -r \${rid} -t Processed_File -o ${source} -c \${cookie}
-      done
-      echo LOG: all old processed file RIDs deleted >> ${repRID}.uploadQC.log
-    fi
-
-    deriva-upload-cli --catalog 2 --token \${cookie:9} ${source} ./deriva
-    echo LOG: processed files uploaded >> ${repRID}.outputBag.log
-
-    deriva-download-cli --catalog 2 --token \${cookie:9} ${source} ${executionRunExportConfig} . rid=${executionRunRID}
-    echo LOG: execution run bag downloaded >> ${repRID}.outputBag.log
-
-    echo -e "### Run Details" >> runDetails.md
-    echo -e "**Workflow URL:** https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq" >> runDetails.md
-    echo -e "**Workflow Version:** ${workflow.manifest.version}" >> runDetails.md
-    echo -e "**Description:** ${workflow.manifest.description}" >> runDetails.md
-    if [ "${species}" == "Mus musculus" ]; then
-      genome=\$(echo GRCm${refMoVersion} | cut -d '.' -f1)
-      patch=\$(echo ${refMoVersion} | cut -d '.' -f2)
-      annotation=\$(echo ${refMoVersion} | cut -d '.' -f3 | tr -d 'v')
-    elif [ "${species}" == "Homo sapiens" ]; then
-      genome=\$(echo GRCh${refHuVersion} | cut -d '.' -f1)
-      patch=\$(echo ${refHuVersion} | cut -d '.' -f2)
-      annotation=\$(echo ${refHuVersion} | cut -d '.' -f3 | tr -d 'v')
-    fi
-    echo -e "**Genome Assembly Version:** \${genome} patch \${patch}" >> runDetails.md
-    echo -e "**Annotation Version:** GENCODE release \${annotation}" >> runDetails.md
-    echo -e "**Run ID:** ${repRID}" >> runDetails.md
-    echo LOG: runDetails.md created >> ${repRID}.outputBag.log
-
-    unzip Execution_Run_${executionRunRID}.zip
-    yr=\$(date +'%Y')
-    mn=\$(date +'%m')
-    dy=\$(date +'%d')
-    mv Execution_Run_${executionRunRID} ${repRID}_Output_Bag_\${yr}\${mn}\${dy}
-    loc=./${repRID}_Output_Bag/data/assets/Study/${studyRID}/Experiment/${expRID}/Replicate/${repRID}/Execution_Run/${executionRunRID}/Output_Files/
-    mkdir -p \${loc}
-    cp runDetails.md \${loc}
-    cp ${multiqc} \${loc}
-    cp ${multiqcJSON} \${loc}
-
-    bdbag ./${repRID}_Output_Bag/ --update --archiver zip --debug
-    echo LOG: output bag created >> ${repRID}.outputBag.log
-    """
-}
-
-/* 
- * uploadOutputBag: uploads the output bag
-*/
-process uploadOutputBag {
-  tag "${repRID}"
-
-  input:
-    path script_uploadOutputBag
-    path credential, stageAs: "credential.json" from deriva_uploadOutputBag
-    path outputBag
-    val studyRID from studyRID_uploadOutputBag
-    val executionRunRID from executionRunRID_uploadOutputBag
-    val fastqCountError_uploadOutputBag
-    val fastqReadError_uploadOutputBag
-    val fastqFileError_uploadOutputBag
-    val speciesError_uploadOutputBag
-    val pipelineError_uploadOutputBag
-
-  output:
-    path ("outputBagRID.csv") into outputBagRID_fl
-
-  when:
-    upload
-    fastqCountError_uploadOutputBag == 'false'
-    fastqReadError_uploadOutputBag == 'false'
-    fastqFileError_uploadOutputBag == 'false'
-    speciesError_uploadOutputBag == 'false'
-    pipelineError_uploadOutputBag == 'false'
-
-  script:
-    """
-    hostname > ${repRID}.uploadOutputBag.log
-    ulimit -a >> ${repRID}.uploadOutputBag.log
-
-    yr=\$(date +'%Y')
-    mn=\$(date +'%m')
-    dy=\$(date +'%d')
-
-    file=\$(basename -a ${outputBag})
-    md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
-    echo LOG: ${repRID} output bag md5 sum - \${md5} >> ${repRID}.uploadOutputBag.log
-    size=\$(wc -c < ./\${file})
-    echo LOG: ${repRID} output bag size - \${size} bytes >> ${repRID}.uploadOutputBag.log
-    
-    loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/output_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
-    echo LOG: output bag uploaded - \${loc} >> ${repRID}.uploadOutputBag.log
-    # url-ify the location
-    loc=\${loc//\\//%2F}
-    loc=\${loc//:/%3A}
-    loc=\${loc// /@20}
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_URL=\${loc})
-    if [ "\${exist}" == "[]" ]
-    then
-      outputBag_rid=\$(python3 ${script_uploadOutputBag} -e ${executionRunRID} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie} -u F)
-      echo LOG: output bag RID uploaded - \${outputBag_rid} >> ${repRID}.uploadOutputBag.log
-      rid=\${outputBag_rid}
-    else
-      exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-      exist=\${exist:8:-6}
-      outputBag_rid=\$(python3 ${script_uploadOutputBag} -e ${executionRunRID} -o ${source} -c \${cookie} -u \${exist})
-      echo LOG: output bag RID already exists - \${exist} >> ${repRID}.uploadOutputBag.log
-      rid=\${exist}
-    fi
-
-    echo "\${rid}" > outputBagRID.csv
-    """
-}
-
-// Extract output bag RID into channel
-outputBagRID = Channel.create()
-outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
-  outputBagRID
-)
-
-/* 
- * finalizeExecutionRun: finalizes the execution run
-*/
-process finalizeExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun_finalizeExecutionRun
-    path credential, stageAs: "credential.json" from deriva_finalizeExecutionRun
-    val executionRunRID from executionRunRID_finalizeExecutionRun
-    val inputBagRID from inputBagRID_finalizeExecutionRun
-    val outputBagRID
-
-  when:
-    upload
-
-  script:
-    """
-    hostname > ${repRID}.finalizeExecutionRun.log
-    ulimit -a >> ${repRID}.finalizeExecutionRun.log
-
-    executionRun=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/RID=${executionRunRID})
-    workflow=\$(echo \${executionRun} | grep -o '\\"Workflow\\":.*\\"Reference' | grep -oP '(?<=\\"Workflow\\":\\").*(?=\\",\\"Reference)')
-    genome=\$(echo \${executionRun} | grep -o '\\"Reference_Genome\\":.*\\"Input_Bag' | grep -oP '(?<=\\"Reference_Genome\\":\\").*(?=\\",\\"Input_Bag)')
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    rid=\$(python3 ${script_uploadExecutionRun_finalizeExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Success -d 'Run Successful' -o ${source} -c \${cookie} -u ${executionRunRID})
-    echo LOG: execution run RID marked as successful - \${rid} >> ${repRID}.finalizeExecutionRun.log
-
-    if [ ${params.track} == true ]
-    then
-    dt=`date +%FT%T.%3N%:z`
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "Complete": "'\${dt}'" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-    """
-}
-
-// Combine errors
-error_meta = fastqCountError_uploadQC_fail.ifEmpty(false).combine(fastqReadError_uploadQC_fail.ifEmpty(false).combine(fastqFileError_uploadQC_fail.ifEmpty(false).combine(speciesError_uploadQC_fail.ifEmpty(false).combine(pipelineError_uploadQC_fail.ifEmpty(false)))))
-error_meta. into{
-  error_failPreExecutionRun
-  error_uploadQC_fail
-}
-errorDetails = fastqCountError_details.ifEmpty("").combine(fastqReadError_details.ifEmpty("").combine(fastqFileError_details.ifEmpty("").combine(speciesError_details.ifEmpty(""))))
-
-/* 
- * failPreExecutionRun_fastq: fail the execution run prematurely for fastq errors
-*/
-process failPreExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun from script_uploadExecutionRun_failPreExecutionRun
-    path credential, stageAs: "credential.json" from deriva_failPreExecutionRun
-    val spike from spikeMeta_failPreExecutionRun
-    val species from speciesMeta_failPreExecutionRun
-    val inputBagRID from inputBagRID_failPreExecutionRun
-    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (speciesError), val (pipelineError) from error_failPreExecutionRun
-    tuple val (fastqCountError_details), val (fastqReadError_details), val (fastqFileError_details), val (speciesError_details) from errorDetails
-
-  output:
-    path ("executionRunRID.csv") into executionRunRID_preFail_fl
-
-  when:
-    upload
-    fastqCountError == 'true' || fastqReadError == 'true' || fastqFileError == 'true' || speciesError == 'true'
-
-  script:
-    """
-    hostname > ${repRID}.failPreExecutionRun.log
-    ulimit -a >> ${repRID}.failPreExecutionRun.log
-
-    errorDetails=""
-    if [ ${fastqCountError} == true ]
-    then
-      errorDetails=\$(echo ${fastqCountError_details}"\\n")
-    elif [ ${fastqReadError} == true ]
-    then
-      errorDetails=\$(echo \$(errorDetails)${fastqReadError_details}"\\n")
-    elif [ ${fastqFileError} == true ]
-    then
-      errorDetails=\$(echo \$(errorDetails)${fastqFileError_details}"\\n")
-    elif [ ${speciesError} == true ]
-    then
-      errorDetails=\$(echo \$(errorDetails)${speciesError_details}"\\n")
-    fi
-
-    echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.failPreExecutionRun.log
-    workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
-    workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    workflow=\${workflow:7:-6}
-    echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.failPreExecutionRun.log
-
-    if [ "${species}" == "Homo sapiens" ]
-    then
-      genomeName=\$(echo GRCh${refHuVersion})
-    elif [ "${species}" == "Mus musculus" ]
-    then
-      genomeName=\$(echo GRCm${refMoVersion})
-    fi
-    if [ "${spike}" == "true" ]
-    then
-      genomeName=\$(echo \${genomeName}-S)
-    fi
-    echo LOG: searching for genome name - \${genomeName} >> ${repRID}.failPreExecutionRun.log
-    genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
-    genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    genome=\${genome:7:-6}
-    echo LOG: genome RID extracted - \${genome} >> ${repRID}.failPreExecutionRun.log
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
-    echo \${exist} >> ${repRID}.failPreExecutionRun.log
-    if [ "\${exist}" == "[]" ]
-    then
-      rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u F)
-      echo LOG: execution run RID uploaded - \${rid} >> ${repRID}.failPreExecutionRun.log
-    else
-      rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-      rid=\${rid:7:-6}
-      echo \${rid} >> ${repRID}.failPreExecutionRun.log
-      executionRun_rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u \${rid})
-      echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.failPreExecutionRun.log
-    fi
-
-    echo "\${rid}" > executionRunRID.csv
-
-    if [ ${params.track} == true ]
-    then
-    dt=`date +%FT%T.%3N%:z`
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "ExecutionRunRID": "'\${rid}'", \
-          "Failure": "'\${dt}'" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-  """
-}
-// Extract execution run RID into channel
-executionRunRID_preFail = Channel.create()
-executionRunRID_preFail_fl.splitCsv(sep: ",", header: false).separate(
-  executionRunRID_preFail
-)
-
-failExecutionRunRID = executionRunRID_fail.ifEmpty('').mix(executionRunRID_preFail.ifEmpty('')).filter { it != "" }
-
-/* 
- * failExecutionRun: fail the execution run
-*/
-process failExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun_failExecutionRun
-    path credential, stageAs: "credential.json" from deriva_failExecutionRun
-    val executionRunRID from executionRunRID_failExecutionRun
-    val inputBagRID from inputBagRID_failExecutionRun
-    val endsMeta from endsMeta_failExecutionRun
-    val endsRaw
-    val strandedMeta from strandedMeta_failExecutionRun
-    val spikeMeta from spikeMeta_failExecutionRun
-    val speciesMeta from speciesMeta_failExecutionRun
-    val endsInfer from endsInfer_failExecutionRun
-    val strandedInfer from strandedInfer_failExecutionRun
-    val spikeInfer from spikeInfer_failExecutionRun
-    val speciesInfer from speciesInfer_failExecutionRun
-    val pipelineError from pipelineError_failExecutionRun
-    val pipelineError_ends
-    val pipelineError_stranded
-    val pipelineError_spike
-    val pipelineError_species
-
-  when:
-    upload
-    pipelineError == 'true'
-
-  script:
-    """
-    hostname > ${repRID}.failExecutionRun.log
-    ulimit -a >> ${repRID}.failExecutionRun.log
-
-    executionRun=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/RID=${executionRunRID})
-    workflow=\$(echo \${executionRun} | grep -o '\\"Workflow\\":.*\\"Reference' | grep -oP '(?<=\\"Workflow\\":\\").*(?=\\",\\"Reference)')
-    genome=\$(echo \${executionRun} | grep -o '\\"Reference_Genome\\":.*\\"Input_Bag' | grep -oP '(?<=\\"Reference_Genome\\":\\").*(?=\\",\\"Input_Bag)')
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    errorDetails=""
-    if [ ${pipelineError} == false ]
-    then
-      rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Success -d 'Run Successful' -o ${source} -c \${cookie} -u ${executionRunRID})
-      echo LOG: execution run RID marked as successful - \${rid} >> ${repRID}.failExecutionRun.log
-    else
-      pipelineError_details=\$(echo "**Submitted metadata does not match inferred:**\\n")
-      pipelineError_details=\$(echo \${pipelineError_details}"|Metadata|Submitted value|Inferred value|\\n")
-      pipelineError_details=\$(echo \${pipelineError_details}"|:-:|-:|-:|\\n")
-      if ${pipelineError_ends}
-      then
-        if [ "${endsInfer}" == "se" ]
-        then
-          endInfer="Single End"
-        elif [ "${endsInfer}" == "pe" ]
-        then
-          endInfer="Paired End"
-        else
-          endInfer="unknown"
-        fi
-        pipelineError_details=\$(echo \${pipelineError_details}"|Paired End|${endsRaw}|"\${endInfer}"|\\n")
-      fi
-      if ${pipelineError_stranded}
-      then
-        pipelineError_details=\$(echo \${pipelineError_details}"|Strandedness|${strandedMeta}|${strandedInfer}|\\n")
-      fi
-      if ${pipelineError_spike}
-      then
-        pipelineError_details=\$(echo \${pipelineError_details}"|Used Spike Ins|${spikeMeta}|${spikeInfer}|\\n")
-      fi
-      if ${pipelineError_species}
-      then
-        pipelineError_details=\$(echo \${pipelineError_details}"|Species|${speciesMeta}|${speciesInfer}|\\n")
-      fi
-      pipelineError_details=\${pipelineError_details::-2}
-      rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${pipelineError_details}" -o ${source} -c \${cookie} -u ${executionRunRID})
-      echo LOG: execution run RID marked as error - \${rid} >> ${repRID}.failExecutionRun.log
-    fi
-    
-    if [ ${params.track} == true ]
-    then
-      dt=`date +%FT%T.%3N%:z`
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "ExecutionRunRID": "'\${rid}'", \
-          "Failure": "'\${dt}'" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-  """
-}
-
-/* 
- * uploadQC_fail: uploads the mRNA QC on failed execution run
-*/
-process uploadQC_fail {
-  tag "${repRID}"
-
-  input:
-    path script_deleteEntry_uploadQC_fail
-    path script_uploadQC_fail
-    path credential, stageAs: "credential.json" from deriva_uploadQC_fail
-    val executionRunRID from failExecutionRunRID
-    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (speciesError), val (pipelineError) from error_uploadQC_fail
-
-  when:
-    upload
-    fastqCountError == 'true' || fastqReadError == 'true' || fastqFileError == 'true' || speciesError == 'true' || pipelineError == 'true'
-
-  script:
-    """
-    hostname > ${repRID}.uploadQC.log
-    ulimit -a >> ${repRID}.uploadQC.log
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=${repRID})
-    if [ "\${exist}" != "[]" ]
-    then
-      rids=\$(echo \${exist} | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//')
-      for rid in \${rids}
-      do
-        python3 ${script_deleteEntry_uploadQC_fail} -r \${rid} -t mRNA_QC -o ${source} -c \${cookie}
-        echo LOG: old mRNA QC RID deleted - \${rid} >> ${repRID}.uploadQC.log
-      done
-      echo LOG: all old mRNA QC RIDs deleted >> ${repRID}.uploadQC.log
-    fi
-
-    qc_rid=\$(python3 ${script_uploadQC_fail} -r ${repRID} -e ${executionRunRID} -o ${source} -c \${cookie} -u E)
-    echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log
-
-    echo "\${qc_rid}" > qcRID.csv
-    """
-}
-
-
-workflow.onError = {
-  subject = "$workflow.manifest.name FAILED: $params.repRID"
-
-  def msg = """\
-
-      Pipeline error summary
-      ---------------------------
-      RID         : ${params.repRID}
-      Version     : ${workflow.manifest.version}
-      Duration    : ${workflow.duration}
-      Nf Version  : ${workflow.nextflow.version}
-      Message     : ${workflow.errorMessage}
-      exit status : ${workflow.exitStatus}
-      """
-      .stripIndent()
-  if (email != '') {
-    sendMail(to: email, subject: subject , body: msg)
-  }
-}
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
new file mode 120000
index 0000000000000000000000000000000000000000..e5aa8d1b5b16a35ab26e961a8e4cc5009d67aef5
--- /dev/null
+++ b/workflow/rna-seq.nf
@@ -0,0 +1 @@
+../rna-seq.nf
\ No newline at end of file
diff --git a/workflow/scripts/bdbag_fetch.sh b/workflow/scripts/bdbag_fetch.sh
index 45ee14a7da409e011494921bafa204b44e96f795..d336829125c030563c96aaf6c354b2f00bdb5a47 100644
--- a/workflow/scripts/bdbag_fetch.sh
+++ b/workflow/scripts/bdbag_fetch.sh
@@ -9,7 +9,7 @@ then
     n=0
     until [ "${n}" -ge "3" ]
     do
-        bdbag --resolve-fetch missing --validate full ${1} --debug && validate=$(tail -n validate.txt | grep -o 'is valid') && break
+        bdbag --resolve-fetch missing --validate full ${1} --debug --config-file bdbag.json && validate=$(tail -n validate.txt | grep -o 'is valid') && break
         n=$((n+1)) 
         sleep 15
     done
@@ -18,8 +18,10 @@ if [ "${validate}" != "is valid" ]
 then
     exit 1
 fi
+count=$(find */ -name "*[_.]R[1-2].fastq.gz" | wc -l)
 for i in $(find */ -name "*[_.]R[1-2].fastq.gz")
 do
     path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz")
     cp ${i} ./${path}
-done
\ No newline at end of file
+done
+echo ${count}