diff --git a/.gitignore b/.gitignore
index 12288788210fa386427657fa55ab47b9ac14a6aa..c154fe7a68856b7217057a894e93fdd0ef803328 100644
--- a/.gitignore
+++ b/.gitignore
@@ -281,6 +281,7 @@ $RECYCLE.BIN/
 # nextflow analysis folders/files
 /test_data/*
 !/test_data/createTestData.sh
+!/test_data/Replicate_For_Input_Bag(test).json
 /workflow/.nextflow/*
 /workflow/work/*
 /workflow/output/*
@@ -301,4 +302,4 @@ timeline*.html*
 *_studyRID.csv
 run*.sh
 
-!.gitkeep
+!.gitkeep
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b0131c6ff424bf4002075c28dcf6e6cbb207536c..344138f7b7822f578f1efba09709c1c355f59709 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -8,8 +8,8 @@ before_script:
   - mkdir -p ~/.bdbag
 
 variables:
-  refMoVersion: "38.p6.vM22"
-  refHuVersion: "38.p12.v31"
+  refMoVersion: "38.p6.vM25"
+  refHuVersion: "38.p13.v36"
   refERCCVersion: "92"
 
 stages:
@@ -84,8 +84,8 @@ getData:
   script:
   - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bdbag --version > version_bdbag.txt
   - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip
-  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6 TEST
+  - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxtest.zip
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6
   - pytest -m getData
   artifacts:
     name: "$CI_JOB_NAME"
@@ -107,13 +107,14 @@ parseMetadata:
   - rep=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
   - exp=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
   - study=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
-  - endsMeta=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
+  - endsRaw=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
+  - endsMeta="uk"
   - endsManual=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual)
   - stranded=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
   - spike=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
   - species=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
   - readLength=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength)
-  - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
+  - echo -e "${endsMeta},${endsRaw},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
   - pytest -m parseMetadata
   artifacts:
     name: "$CI_JOB_NAME"
@@ -136,7 +137,7 @@ inferMetadata:
     align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
     if [[ ${align} == "" ]]; then exit 1; fi
   - >
-    singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
+    singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
     ended=`singularity run 'gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/infer_meta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
     if [[ ${ended} == "" ]]; then exit 1; fi
   - pytest -m inferMetadata
@@ -192,11 +193,11 @@ alignData:
   script:
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 --version > version_hisat2.txt
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools --version > version_samtools.txt
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
   - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
@@ -245,9 +246,9 @@ countData:
     - merge_requests
     - schedules
   script:
-  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
-  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
+  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/geneID.tsv
+  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/Entrez.tsv
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/sequence/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
   - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se_countData
   - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
   - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
@@ -312,7 +313,7 @@ dataQC:
   - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
   - >
     for i in {"chr8","chr4","chrY"}; do
-      echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
+      echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
     done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
   - pytest -m dataQC
 
@@ -358,12 +359,12 @@ uploadExecutionRun:
     cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
     cookie=${cookie:11:-1} &&
     if [ "${exist}" == "[]" ]; then
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BTFM -g 17-BT50 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
       echo ${rid} test execution run created
     else
       rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
       rid=${rid:7:-6} &&
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BTFM -g 17-BT50 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
       echo ${rid} test execution run already exists
     fi
 
@@ -388,7 +389,7 @@ uploadQC:
       done
       echo all old mRNA QC RIDs deleted
     fi
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BTG4 -p "Single Read" -s forward -l 35 -w 5 -f 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single Read" -s forward -l 35 -w 5 -f 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
       echo ${rid} test mRNA QC created
 
 uploadProcessedFile:
@@ -402,8 +403,8 @@ uploadProcessedFile:
   script:
   - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
   - echo THIS IS A TEST FILE > 17-BTFJ_test.csv
-  - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BTG4/
-  - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BTG4/17-BTFJ_test.csv
+  - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/
+  - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/17-BTFJ_test.csv
   - >
     exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=17-BTFJ) &&
     cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
@@ -441,7 +442,7 @@ uploadOutputBag:
       cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
       cookie=${cookie:11:-1} &&
       loc=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/output_bag/TEST/test.txt --parents) &&
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_output_bag.py -e 17-BTG4 -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_output_bag.py -e 17-BVDJ -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
       echo ${rid} test output bag created
     else
       rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
@@ -481,7 +482,7 @@ human_BioHPC:
     - schedules
   script:
   - mkdir -p hu
-  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/
+  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./hu/
 
 mouse_BioHPC:
   stage: reference
@@ -493,7 +494,7 @@ mouse_BioHPC:
     - schedules
   script:
   - mkdir -p mo
-  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/
+  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./mo/
 
 human_dev:
   stage: reference
@@ -511,7 +512,7 @@ human_dev:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -537,7 +538,7 @@ mouse_dev:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -563,7 +564,7 @@ human_staging:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -590,7 +591,7 @@ mouse_staging:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -616,7 +617,7 @@ human_prod:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -643,7 +644,7 @@ mouse_prod:
   - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
   - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
   - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
   - curl --request GET ${query} > refQuery.json
   - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
   - loc=$(dirname ${refURL})
@@ -663,7 +664,7 @@ integration_se:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging -with-dag dag.png --dev false --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
   - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
   artifacts:
     name: "$CI_JOB_NAME"
@@ -687,7 +688,7 @@ integration_pe:
   script:
   - hostname
   - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true
   - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
   artifacts:
     name: "$CI_JOB_NAME"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b3749cd9b15f1b7ea5f54f41601a827ec2268d7..6c77f50591fc2503b8598505a2a88783eb75fca9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,15 @@
 * Upload mRNA QC
 * Create and upload output bag
 * Add optional to not upload
+* Update references to use bags
+* Update to newer references (GRCh38.p13.v36 and GRCm38.p6.vM25)
+* Use production server for data-hub reference call
+* Error pipeline if submitted does not match infered
+* Update execution run with "Success" or "Error"
+* Error if fastq error (>2, if pe != 2, if se !=1)
+* Error if pe and line count of R1 != R2
+* Error if ambiguous species inference
+* Remove non fastq from inputBag from the export bag config level
 
 **Background**
 * Remove (comment out) option to pull references from S3
@@ -17,6 +26,10 @@
 * Changed order of steps so that fastqc is done after the trim step
 * Change docker images to production
 * Add automated version badges
+* Only calculate/report tin values on regular chromosomes (from gtf)
+* Change inputBag fetch to manifest then validate (if fail fetch missing and revalidate up to 3 times)
+* Retry getData and trimData processes up to once
+* Make inputBag export config to create inputBag with only small txt file for CI unit test of getData (and update test)
 
 *Known Bugs*
 * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production
diff --git a/README.md b/README.md
index 185f4453163575fc4b76e8ccb00aac53c74b25ab..6f6581c9647eb5bdac8f0fe660ae1bd8308a7855 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,6 @@ Introduction
 ------------
 This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub. It is designed to run on the HPC cluster ([BioHPC](https://portal.biohpc.swmed.edu)) at UT Southwestern Medical Center (in conjunction with the standard nextflow profile: config `biohpc.config`)
 
-![flowchart](docs/RNA-Seq%20Pipeline%20Design%20Flowchart.jpg "Flowchart")
-
 Cloud Compatibility:
 --------------------
 This pipeline is also capable of being run on AWS. To do so:
@@ -37,10 +35,10 @@ To Run:
     * **dev** = [dev.gudmap.org](dev.gudmap.org) (default, does not contain all data)
     * **staging** = [staging.gudmap.org](staging.gudmap.org) (does not contain all data)
     * **production** = [www.gudmap.org](www.gudmap.org) (***does contain  all data***)
-  * `--refMoVersion` mouse reference version ***(optional, default = 38.p6.vM22)***
-  * `--refHuVersion` human reference version ***(optional, default = 38.p12.v31)***
+  * `--refMoVersion` mouse reference version ***(optional, default = 38.p6.vM25)***
+  * `--refHuVersion` human reference version ***(optional, default = 38.p13.v36)***
   * `--refERCCVersion` human reference version ***(optional, default = 92)***
-  * `--upload` option to not upload output back to the data-hub ***(optional, default = true)***
+  * `--upload` option to not upload output back to the data-hub ***(optional, default = false)***
     * **true** = upload outputs to the data-hub
     * **false** = do *NOT* upload outputs to the data-hub
   * `-profile` config profile to use ***(optional)***:
@@ -55,36 +53,46 @@ To Run:
 * NOTES:
   * once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials)
   * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version)
-    * *current mouse* **38.p6.vM22** = GRCm38.p6 with GENCODE annotation release M22
-    * *current human* **38.p6.v31** = GRCh38.p12 with GENCODE annotation release 31
+    * *current mouse* **38.p6.vM25** = GRCm38.p6 with GENCODE annotation release M25
+    * *current human* **38.p13.v36** = GRCh38.p13 with GENCODE annotation release 36
 * ***Optional*** input overrides
   * `--refSource` source for pulling references
     * **biohpc** = source references from BICF_Core gudmap reference local location (workflow must be run on BioHPC system)
     * **datahub** = source references from GUDMAP/RBK reference_table location (currently uses dev.gudmap.org)
   * `--inputBagForce` utilizes a local replicate inputBag instead of downloading from the data-hub (still requires accurate repRID input)
-    * eg: `--inputBagForce test_data/bag/Replicate_Q-Y5F6.zip` (must be the expected bag structure)
+    * eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
   * `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
     * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
   * `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses ambiguous species error
     * eg: `--speciesForce 'Mus musculus'`
 * Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
   * `--ci` boolean (default = false)
-  * `--dev` boolean (default = false)
+  * `--dev` boolean (default = true)
 
 FULL EXAMPLE:
 -------------
 ```
-nextflow run workflow/rna-seq.nf --deriva ./data/credential.json --bdbag ./data/cookies.txt --repRID Q-Y5JA
+nextflow run workflow/rna-seq.nf --repRID Q-Y5JA --source production --deriva ./data/credential.json --bdbag ./data/cookies.txt --dev false --upload true -profile biohpc
 ```
 
 To run a set of replicates from study RID:
 ------------------------------------------
 Run in repo root dir:
 * `sh workflow/scripts/splitStudy.sh [studyRID]`
-It will run in parallel in batches of 25 replicatesRID with 30 second delays between launches.\
+It will run in parallel in batches of 5 replicatesRID with 30 second delays between launches.\
 NOTE: Nextflow "local" processes for all replicates will run on the node/machine the bash script is launched from... consider running the study script on the BioHPC's SLURM cluster (use `sbatch`).
 
-
+Errors:
+-------
+Error reported back to the data-hub are (they aren't thrown on the command line by the pipeline, but rather are submitted (if `--upload true`) to the data-hub for that replicate in the execution run submission):
+
+|Error|Descripton|
+|:-|:-:|
+|**Too many fastqs detected (>2)**|Data-hub standards and that of this pipeline is for one read-1 fastq and if paired-end, one read\-2 fastq. As a result, the maximum number of fastq's per replicate cannot be more than 2.|
+|**Number of fastqs detected does not match submitted endness**|Single-end sequenced replicates can only have one fastq, while paried\-end can only have two (see above).|
+|**Number of reads do not match for R1 and R2**|For paired\-end sequenced studies the number of reads in read\-1 fastq must match that of read\-2. This error is usually indicative of uploading of currupted, trunkated, or wrong fastq files|
+|**Inference of species returns an ambiguous result**|Species of the replicate is done by aligning a random subset of 1 million reads from the data to both the human and mouse reference genomes. If there isn't a clear difference between the alignment rates (`>=40%` of one species, but `<40%` of the other), then this error is detected.|
+|**Submitted metadata does not match inferred**|All required metadata for analysis of the data is internally inferred by the pipeline, if any of those do not match the submitted metadata, this error is detected to notify of a potential error.|
 
 <hr>
 [**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/blob/develop/CHANGELOG.md)
diff --git a/docs/RNA-Seq Pipeline Design Flowchart.drawio b/docs/RNA-Seq Pipeline Design Flowchart.drawio
deleted file mode 100644
index 7b65c655592950fdb14c7a8b7c74a4f986816669..0000000000000000000000000000000000000000
--- a/docs/RNA-Seq Pipeline Design Flowchart.drawio	
+++ /dev/null
@@ -1 +0,0 @@
-<mxfile host="Electron" modified="2020-03-23T23:17:50.947Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.6.5 Chrome/80.0.3987.86 Electron/8.0.0 Safari/537.36" etag="EoueA3AcDhzhFmxKZ9Lg" version="12.6.5" type="device"><diagram name="Page-1" id="74e2e168-ea6b-b213-b513-2b3c1d86103e">7V1Zd5u6Fv41Xuveh2QxD49Nk7S9pzlNTpq0PS9dMsg2CQYXcBP3118xCDMIAWZ2nJcYIWGs/e1J2ntrxr9fv35wwGZ1Y+vQnHGM/jrjL2ccJzASj/75LbuwheNlOWxZOoYetrH7hnvjD4wamah1a+jQTXX0bNv0jE26UbMtC2peqg04jv2S7rawzfS3bsAS5hruNWDmW78ZurcKW3mGYfY3PkJjuYq+WsE35kB7Xjr21oq+b8bxi+AvvL0G+FlRf3cFdPsl0cRfzfj3jm174af163to+pOLpy0cd11wN35vB1pelQE3Px8/6OrD5hMrf9S/boXLT/faGX6M6+3whEAdzU90aTveyl7aFjCv9q0XwY+G/mMZdLXv89m2N6iRRY1P0PN2EbHB1rNR08pbm9Fd+Gp43xOff/iPOhejq8vX6MnBxS5xcQsdYw096OA2y3N235MX4ZM4EV/vnxVc4YeFP9r/pYWTiSfG3jpa1Ovix7+vd/zPn9zPP3f2Vn76+SIaZ6wU4d0DzhJ6lLkWYqIjboI2+iHODo1zoAk843f6TUCE6mXcLxr6znHALtFhYxuW5yaefOs3oA6vmM0Y9VyImDTiUUHOIKXCGJ4pGaOq7LnAqfGf0upwQUwORx/CecBXiQndNwX4r8ELotyYF5626w3uj/jd2Zz4gxW5PH8sdVa5+3PPvucerj5yc9eRTDgMf3CyjIDni/vorx6rlAxH2oIOe0lSKbBvOLwXruHUkwapzCHPl1/+dVcf/r57v/g4N73ftzdX5pkg5BmEyErKEAyiCLX1B2GIVCL/OVak8VHD4b0wAiZjS+rDsi34pnjj01f54euvp9fvPyF8+iOrMnv11xmLXYIEb9z9fOJvP/z1+Pjp7yX439L2ft1aZyIzG4A5WJ5TzxVpjzu2nqFFHy5KJajnGX944zH9sMfJ0+jC0xgTMzB5aFVyNbJwLBP2JAgfMqYX2GPv/wT7NmFPVBb8YKjvwcEuM3HGZSHJMpQ0VV6wqqQDReHPomn/DcxtRIgZJ5mImhcLO3jPPYNIv7Y2vnHmBhB/hzqwzOZ1fxN9Wvr/b4BhoZvvEHfsPENz8VPRC4YPDrvhZncDrOIve4nW+fyvE/wJDaHMaLZpO2Gzs5yD/wR3ODQJDPHTf+OBwVMXYG2Yu3D42rZs9A4I8Kku+19J/JEzTswuwYpo3v3WYP0wvsJ0EANKoJZL/7P/YqI/4SKib1lfNu6LufWgx3D7x4Q0j+8gwbbdBDfmYL3vFZIk7uV6jv0crcj6jX6nSMT5l2xw6a7AJhyzfl36k3O+MO0XbQUc71y3te0avX448GVlePA+nHjU/QV1Dp/gT7zfEsg0v2VhmOb7gN74XfiFokFNC/sHr5W5P1dEwVe5YkAp/Kuh48HXSvMa34qndC+9RJ+D9z2TRGB2iRtM8sZLPG3RzcS9VYjx+KaUuIdkXty+TLxClubBZUz4ZGMajlG/HG5jFg3ZEYM8oyfdF2NtgsALSqg2bWWY+mews7e+KnA9oD3jK0QdRPlIKQb8lVak8Qq8f2GCOTQv4jX8kKSx25WEX/i9Pp9eR5x8+QgdHVggao6+UUGXwDSWFvqsIQL7WjQBJ/838boIFV2YpYEU3FG4OS9J8TwQNGWEqH1TXtXFwj5n7LCRXxBhA7dFjavEBgf2IIgqMqEg6sl/Lif/kS6/AMuKVM9Rto15LVRS5RONF+iIk0qYU6GFOSVaRxzTfP0tuewAHK2ZyYlNydBQlEtszgL7ssy8DK6yxmqO9KGhU430XN4aJS7WiRVdMJbliTZw7+vbTEoMIDOQbiZmuqtMBqHNzELalJaLhj1qfcAlVHqk0FN4BHPXNrcefOdoEWaD1v2VkBPzXFdyBQu/ygKcT5OBFQhyhiS748YmcoZIJAznYfzYve/6I3GnTKbsxciPlBQhy5S8FGxRbJDWMWnM0J7UaEb1rtw2B25MQwOe7wf98+my0GvLQC5lEWRZN/QGkC8ArC0wP1mbwCYkiYm0IHEjs0JKywe2H96uytptWGV0TdUlkcPHzR18aw6WhleV6nlSJ3GAqU7xAMkoiOgerWb1QGqOZIN3Revc2+epjCTRJfDARE3weA88M8ltmOTRt/0DNQ9YS39dosbX8YRvk/nMtwETGa4W4o0LXxa5OcrWs6sKIi+kQXX2YI7AQZoc2zdlmlyqqMh5tm1NfgT2fwFOmwdenXB6ME65I8ApL/fgp0qVFehkHVVsI0zYUcV4bt2aXQDX+9Wxybremp5xhg3XAux0Qsfs1niejorSjaFK1Z6tUxG9HtADnn0ThCS5HF0RMvfyQo6G6Le68CYmwZT9Dj6rsQbfCmDFQUNxe7GjDjKZcGBmmclUNbyKk4/BZBLaNZmI0U/88XugRCh0B0jyNA+TOtHMtVR6MNnlmhposoY7VrXTMNypUpVg8m3Nww0+/AzTSJqAV8GaW2wC7m+Su997DrD0OgM2xjOs1R9qBoxfab9QTn0GavSnpsB0RUjwcriK4luSIIyaEsgmAd+HlqEB810U4bI2dD0Q1za64xu+qG2F2qA1i4Jrbm3X8Aw7FQ6Dn/I50yF+Gg6gMeEiitfTDGuJGs4CdkqE27BcP6yBIytLWUNpgTMo4r2QM4pw1ZLD5O+mZdiFFFk5LYBmcVWAkSrQan33pw0HIfeq+SALzzHW09/+EZTRuWESd/RGb3vL2WJFU1jJm8LUZMpJ+2Ztb7vczXcPmug+Wupy/rz6fPttd2Wcyc3zFccO0wOshQZ4JM/yMYQB8lIPvppSXUNN1k3DWngabhpVwLa+Mu9Tew31vAs0pY2X+gQebr2eKq8G8TV+acFY4B2Xs9GDYCBt2PXnXBB3xv+Bizx3Tsm1kITxuRbHv8PTnmuB9x5KE4wJsdn0TOShjTmunjGX6d5LTFd+/75AJEzWlsNCr8aO8YC2XAGgp5PVQa1O0EOmB0GakO1jpqo0aT1AtJldz3Rk1ztwAdG8BinvS2ih1y227tox5v2tvTlw4eFmfG3eFtO8TWBtkqXQhhVP11StU3PlJ9RfLyzklF8vPSTOr+d7n23kzll9iZ0pxzdkMFWeoMFW2fRX8OOCuuMxs+XTCn51xYiD98sUo5rXi8QqPtI4bOzuFvBzg7MbhWoG2OGURYMy2G7DVlfaLXw7jRIC3SKbGOGijsR9HP9egFpD2U3WgcQafRqbAVREn1aLe10tboqcXleLaUq+dfckKCl2PQfGeFySukFFMp2Sw+0XXRiG96rc3jnr/ylPf4wvW12QCHX9gvpu03dL1H7zyit8HTGvXBwgr1wWc6Q9fnuR6DDlgUdjktROBKkfnzckqVWxJ21ITsxHUt4g5ruHN7kiDcMdAb7bdpRoU1pJ/U7FUaLaGRN2lmJYn7ylLr2lTuAzvMfUWSm26blMxRQeo9tElgV5UbAGz/DCWH4zploTF99lu8iML/acFDYj6XmlIq9ynfpO4O9b8GDdws3u+9f5w43woG7WcebvG9tYyuOMyhVJS5LYkVDciNhPOQI7svVkEMqEVhNHUzEk6ZJ3wpZkPlWiJVMgoveIynJmcBAdxUDAQXQIQ3846LWaJ/EnEIokaYg1j6GoJzdkWif5nMvmi54TVt3U+iZUcJbqcpZw/CW5o3hS3tQZrCQHJq28+WNYBsqn4rejvQOaz7gLb4McekZzf78NRd4YE70qcuLJ13k9vt2YNtCPQJELQyZRFR8zXnWypyItqbCahrAkWl2D5ruN+fTMItYsN7dYwjZccb3jvs0tVkxjkGdTBlRpf1bp4VDXeJ3qBMsDYEkt+FuKyoLwqG5RybP1UJnt3w8qB03mO05UchVRqYwClWo9VAoCvT8+xvjQ/v2gvnlgzgn15BIvpahXh0C9lN2aL5HF2f5lKM7uJ9bt3wvqsRt1Qv0hqK9+zvx4YC+r9WCf7d8PLE+GcQNY0jIlUuUECiMB+walWtNby/bvB5Qnu7htUJJK5hTHFfS+hsDWdNdyA/rB5aDBQG8bl+wgJyY0B6bAlAzg1IYDeoE+P+iJf7WgPzDMqQXhx7kskV0G4EsgOMiygdBuzYMosv0Ny2PqXncSqMWbcr2LY6HmUkJuQD9QbTf1zF0BZ3PCalFQ5UihymWFahlUcwOkMmxnfbm6A9pmBgNou49fXu7ZL+wXV/vr31fWuCWESsQFo1MsUhA5I1LPCKoaf0N+yq2xgaZh+akJl9D1c5c45hpHy1BP9ilIw5qXJVwlYw+iZKncSTu5LCoHvdsfMA8e5fNUBD30XPFiJl76z0KM7+4DGPI8VjuwgMtEgRAyNOJqu8nAguyp6a2V+FKKq5i7G2BVAQE5COvufYKA4aOKoq6IcTzayjD1z2Bnb/0pcD2gPeOrfIxPWsgjwa4HyXH+RXAC0wUavQw6ZTLw0kEobPTjrsHaMH0KPUJHBxaYpU5QUEgoayN2pX5QkcKq54rExH9sWg4R4rlUYoxRGwFdRJUtEcpi2C+WC9YbEw4R1VWWUVfMI3kqjCYk+3TQTrpMH9UuouK01OOUCEFD5I5TPIpypJUoqJSoLlrGHMNYIpcooC3UDuMLYpS6Mjb2hNdRn+w5KXTrY+gk7oNIO1wWNzn2JEfXwEC6Pwo1jx/TRR2sZplYJ63fTOtXVfqEVWYy155UfqcqP68+ysTMsan8yWn8QoX/1mq1HELO4bQ8eVM5R0zDWkDnpugQ+enpeL7fii1qlYotMuH7ui3YQjY2Bg0v7sXaqGBG0PiiNCij6tIBN0hqR8MifwzFjjgAg7QJJAmgY9P+xUCbkPbn6FXRD1X++cqAAQyuAslHqd2X6X/vOcDSa43YGM+w3gCoGZD+VrOxVhQMdk5ubdfwDJu4o/Y50yF+Gt4dMeEiWtvRDGuJGs4CJiKfRt05Q1ROb1da4Aei9ZG3hoOlKq1vy6mimVRnK0rook7NcVhOx284HVovh8YkpWkqhDAVoow4tn2YA2BJm79yYTRmK6oqmiZS+qEYwB2soWByT2QBpZi8Y1xAIUbWCVyOlJ5hHYH27+SEcIr2Vypof7F37U+cTK6F7JbpHZhAtAhywAyDlojApPJPaQgrpn15+gu2I0+VccuEF57TEuE1FWuBLqCnYS7QIZ0lFONqtgPdATR7B/OORwyg2slJ80Lenj4Kx76TU4mPRbWfzv86QLXT+ae8PIVIyG0t6MmdlHtVKEsnKI8ayvwRQJmXe4ByPKl0TTwVO7XE2piGoUoXzxlCMU+uTVgUmUpt5o4I1mtxZroMSh6O4jsBfqrXoD5FR1NOOuus17DqFpTykSY3U52b8iSevNYtjjfpW8Xy+HTEignO2f69pOKLzUtJvaFU/Mpope3/VLARBzEJJb4eXrP9ceRAYf9Mdn3d/v3wQ7uL/Cd+mFVY4BwrR8hqPY7I9u8Hse2uXfWN2EPRqQN3FXt9nRsaHKGS6piQKsgZpHJ0pGb7H4Xsltpd+jrJbtpcV02lG6a4kFhPcGf791MycDp13lsR0iTeqoDTqrmdJBE9nmLXfqTrucDvK7zwteRpyXCurLIVw6nnAqfGf0qbw3mhj5pxzQ9OegtmThNuGVFd2YHZRWQasQt9eC/sgu2vN8Yuh3EIEfjqyE1+lpVoNcPEkkM+SoZLJcM5IRjeeEw/zIBpfmKGA5mBxWvTo+UGnlpBr5Qb6MOPixu4EzdU54b57kET3UdLXc6fV59vv+2ujDOZkAA+JmZQBN/6qcUAhCGVAFxsZDUc3g8rTHvFaECngqglxr5KKslqji9KFkoJQ0qBLfE056Ph8IZ8gS4d288e23f3QzdubB36Pf4P</diagram></mxfile>
\ No newline at end of file
diff --git a/docs/RNA-Seq Pipeline Design Flowchart.jpg b/docs/RNA-Seq Pipeline Design Flowchart.jpg
deleted file mode 100644
index a56462a2991aecb22d5bbff493c18e3f673f2b0f..0000000000000000000000000000000000000000
Binary files a/docs/RNA-Seq Pipeline Design Flowchart.jpg and /dev/null differ
diff --git a/docs/RNA-Seq Pipeline Design Process Table.docx b/docs/RNA-Seq Pipeline Design Process Table.docx
deleted file mode 100644
index 21604d8f30662ffd93f8d0605b671a0921864b0c..0000000000000000000000000000000000000000
Binary files a/docs/RNA-Seq Pipeline Design Process Table.docx and /dev/null differ
diff --git a/docs/RNA-Seq Pipeline Design Process Table.pdf b/docs/RNA-Seq Pipeline Design Process Table.pdf
deleted file mode 100644
index 97f1d5ddfb0ae0848aa0bf8b37681db9efeb3c6b..0000000000000000000000000000000000000000
Binary files a/docs/RNA-Seq Pipeline Design Process Table.pdf and /dev/null differ
diff --git a/docs/dag.png b/docs/dag.png
old mode 100644
new mode 100755
index 58456bbcad81eb5752fb85a764dfb6792cea9aaa..5a3a8e635b8195e9ff15a09748f41627001157e7
Binary files a/docs/dag.png and b/docs/dag.png differ
diff --git a/docs/software_references_mqc.yaml b/docs/software_references_mqc.yaml
old mode 100644
new mode 100755
diff --git a/docs/software_versions_mqc.yaml b/docs/software_versions_mqc.yaml
old mode 100644
new mode 100755
index ea5487adc7ac4894ff48bae93783a5348a945160..4f0f08bfb5d317421a1ea9f7dd26a844cdd02cb7
--- a/docs/software_versions_mqc.yaml
+++ b/docs/software_versions_mqc.yaml
@@ -20,5 +20,5 @@
             <dt>deepTools</dt><dd>v3.5.0</dd>
             <dt>FastQC</dt><dd>v0.11.9</dd>
             <dt>MultiQC</dt><dd>v1.9</dd>
-            <dt>Pipeline Version</dt><dd>v0.0.4_indev</dd>
+            <dt>Pipeline Version</dt><dd>v0.1.0</dd>
             </dl>
diff --git a/test_data/Replicate_For_Input_Bag(test).json b/test_data/Replicate_For_Input_Bag(test).json
new file mode 100644
index 0000000000000000000000000000000000000000..46fefe878c7c370792b403c4fb89d3ac79fd5c69
--- /dev/null
+++ b/test_data/Replicate_For_Input_Bag(test).json
@@ -0,0 +1,97 @@
+{
+  "bag": {
+    "bag_name": "{rid}_inputBag",
+    "bag_algorithms": [
+      "md5"
+    ],
+    "bag_archiver": "zip"
+  },
+  "catalog": {
+    "query_processors": [
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Study",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Sequencing_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Antibodies",
+          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Custom Metadata",
+          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Experiment Settings",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Has_Strand_Specific_Information,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Replicate",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen_Anatomical_Source",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Specimen_Cell_Types",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Single Cell Metrics",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "File",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=txt/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
+        }
+      }
+    ]
+  }
+}
diff --git a/test_data/createTestData.sh b/test_data/createTestData.sh
index 8f5cebc6180a739189693c62451a3eb0f1970245..35fa2a4f467627a09bedd6f2675df04971c341f1 100644
--- a/test_data/createTestData.sh
+++ b/test_data/createTestData.sh
@@ -12,12 +12,14 @@ mkdir -p NEW_test_data
 ln -sfn ./test_data/auth/credential.json ~/.deriva/credential.json
 
 mkdir -p ./NEW_test_data/bag
+singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-download-cli staging.gudmap.org --catalog 2 './Replicate_For_Input_Bag(test).json' . rid=Q-Y5F6
+cp Q-Y5F6_inputBag.zip ./NEW_test_data/bag/Q-Y5F6_inputBag_xxxxtest.zip
 singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-download-cli staging.gudmap.org --catalog 2 ../workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
 cp Q-Y5F6_inputBag.zip ./NEW_test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip
 
 mkdir -p ./NEW_test_data/fastq
-unzip ./NEW_test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip
-singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ../workflow/scripts/bdbagFetch.sh Q-Y5F6_inputBag Q-Y5F6
+unzip ./Q-Y5F6_inputBag.zip
+singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ../workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6
 cp Q-Y5F6.R1.fastq.gz ./NEW_test_data/fastq/Q-Y5F6.R1.fastq.gz
 cp Q-Y5F6.R2.fastq.gz ./NEW_test_data/fastq/Q-Y5F6.R2.fastq.gz
 
@@ -45,11 +47,11 @@ cp metaTest.csv ./NEW_test_data/meta/metaTest.csv
 
 mkdir -p ./NEW_test_data/bam
 mkdir -p ./NEW_test_data/bam/small
-singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/hisat2/genome --rna-strandness F -U ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/hisat2/genome --rna-strandness F -U ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
-singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
 singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
@@ -82,9 +84,9 @@ cp Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.
 
 mkdir -p ./NEW_test_data/counts
 mkdir -p ./NEW_test_data/counts/small
-ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/geneID.tsv
-ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/Entrez.tsv
-singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/metadata/geneID.tsv
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/metadata/Entrez.tsv
+singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/annotation/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/sequence/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
 singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ../workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se_countData
 singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ../workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
 cp Q-Y5F6_1M.se_countData ./NEW_test_data/counts/small/Q-Y5F6_1M.se_countData
@@ -104,7 +106,7 @@ cp Q-Y5F6_1M.R1_fastqc.zip ./NEW_test_data/fastqc/small/Q-Y5F6_1M.R1_fastqc.zip
 
 echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
 for i in {"chr8","chr4","chrY"}; do
-echo "tin.py -i ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
+echo "tin.py -i ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/new/GRCm38.p6.vM25/data/annotation/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
 cp Q-Y5F6_1M.se.sorted.deduped.tin.xls ./NEW_test_data/meta/Q-Y5F6_1M.se.sorted.deduped.tin.xls
 
 chgrp -R BICF_Core ./NEW_test_data
diff --git a/workflow/conf/Replicate_For_Input_Bag.json b/workflow/conf/Replicate_For_Input_Bag.json
index 4380e46734a4425f7df57ad0cf0553a868b03c9d..278d0bf4d9d9f5074d7e3c4ef948287eb97ed767 100644
--- a/workflow/conf/Replicate_For_Input_Bag.json
+++ b/workflow/conf/Replicate_For_Input_Bag.json
@@ -89,7 +89,7 @@
         "processor": "fetch",
         "processor_params": {
           "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
         }
       }
     ]
diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config
index 3e3cbb65a60f726cb936d0dacaefe3a1a07662e4..cdd91da7bb7ede67aa9a004ab4d617186edee334 100644
--- a/workflow/conf/aws.config
+++ b/workflow/conf/aws.config
@@ -16,91 +16,107 @@ process {
   cpus = 1
   memory = '1 GB'
 
-  withName: trackStart {
+  withName:trackStart {
     cpus = 1
     memory = '1 GB'
   }
-  withName: getBag {
+  withName:getBag {
     cpus = 1
     memory = '1 GB'
   }
-  withName: getData {
+  withName:getData {
     cpus = 1
     memory = '1 GB'
   }
-  withName: parseMetadata {
+  withName:parseMetadata {
     cpus = 15
     memory = '1 GB'
   }
-  withName: trimData {
+  withName:trimData {
     cpus = 20
     memory = '2 GB'
   }
-  withName: getRefInfer {
+  withName:getRefInfer {
     cpus = 1
     memory = '1 GB'
   }
-  withName: downsampleData {
+  withName:downsampleData {
     cpus = 1
     memory = '1 GB'
   }
-  withName: alignSampleData {
+  withName:alignSampleData {
     cpus = 50
     memory = '5 GB'
   }
-  withName: inferMetadata {
+  withName:inferMetadata {
     cpus = 5
     memory = '1 GB'
   }
-  withName: getRef {
+  withName:checkMetadata {
     cpus = 1
     memory = '1 GB'
   }
-  withName: alignData {
+  withName:getRef {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:alignData {
     cpus = 50
     memory = '10 GB'
   }
-  withName: dedupData {
+  withName:dedupData {
     cpus = 5
     memory = '20 GB'
   }
-  withName: countData {
+  withName:countData {
     cpus = 2
     memory = '5 GB'
   }
-  withName: makeBigWig {
+  withName:makeBigWig {
     cpus = 15
     memory = '5 GB'
   }
-  withName: fastqc {
+  withName:fastqc {
     cpus = 1
     memory = '1 GB'
   }
-  withName: dataQC {
+  withName:dataQC {
     cpus = 15
     memory = '2 GB'
   }
-  withName: aggrQC {
+  withName:aggrQC {
     cpus = 2
     memory = '1 GB'
   }
-  withName: uploadInputBag {
+  withName:uploadInputBag {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadExecutionRun {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadQC {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:uploadProcessedFile {
     cpus = 1
     memory = '1 GB'
   }
-  withName: uploadExecutionRun {
+  withName:uploadOutputBag {
     cpus = 1
     memory = '1 GB'
   }
-  withName: uploadQC {
+  withName:finalizeExecutionRun {
     cpus = 1
     memory = '1 GB'
   }
-  withName: uploadProcessedFile {
+  withName:failPreExecutionRun {
     cpus = 1
     memory = '1 GB'
   }
-  withName: uploadOutputBag {
+  withName:failExecutionRun {
     cpus = 1
     memory = '1 GB'
   }
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index ca9f0e4f935099a50f6ef241dfe42f37e6e382b9..8cfc34ad4e8deea2735e0e49f613dfa48bb6defc 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -6,71 +6,89 @@ process {
   executor = 'slurm'
   queue = 'super'
   clusterOptions = '--hold'
+  time = '4h'
+  errorStrategy = 'retry'
+  maxRetries = 1
 
-  withName: trackStart {
+  withName:trackStart {
     executor = 'local'
   }
-  withName: getBag {
+  withName:getBag {
     executor = 'local'
   }
-  withName: getData {
+  withName:getData {
     queue = 'super'
   }
-  withName: parseMetadata {
+  withName:parseMetadata {
     executor = 'local'
   }
-  withName: trimData {
+  withName:trimData {
     queue = 'super'
   }
-  withName: getRefInfer {
+  withName:getRefInfer {
     queue = 'super'
   }
-  withName: downsampleData {
+  withName:downsampleData {
     executor = 'local'
   }
-  withName: alignSampleData {
+  withName:alignSampleData {
     queue = 'super'
   }
-  withName: inferMetadata {
+  withName:inferMetadata {
     queue = 'super'
   }
-  withName: getRef {
+  withName:checkMetadata {
+    executor = 'local'
+  }
+  withName:getRef {
     queue = 'super'
   }
-  withName: alignData {
+  withName:alignData {
     queue = '256GB,256GBv1'
   }
-  withName: dedupData {
+  withName:dedupData {
     queue = 'super'
   }
-  withName: countData {
+  withName:countData {
     queue = 'super'
   }
-  withName: makeBigWig {
+  withName:makeBigWig {
     queue = 'super'
   }
-  withName: fastqc {
+  withName:fastqc {
     queue = 'super'
   }
-  withName: dataQC {
+  withName:dataQC {
     queue = 'super'
   }
-  withName: aggrQC {
+  withName:aggrQC {
+    executor = 'local'
+  }
+  withName:uploadInputBag {
+    executor = 'local'
+  }
+  withName:uploadExecutionRun {
+    executor = 'local'
+  }
+  withName:uploadQC {
+    executor = 'local'
+  }
+  withName:uploadProcessedFile {
     executor = 'local'
   }
-  withName: uploadInputBag {
+  withName:uploadOutputBag {
     executor = 'local'
   }
-  withName: uploadExecutionRun {
+  withName:finalizeExecutionRun {
     executor = 'local'
   }
-  withName: uploadQC {
+  withName:failPreExecutionRun {
     executor = 'local'
   }
-  withName: uploadProcessedFile {
+  withName:failPreExecutionRun {
     executor = 'local'
   }
-  withName: uploadOutputBag {
+  withName:failExecutionRun {
     executor = 'local'
   }
 }
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index 33b16b42074e47dcce027c2ac391304b95c91ff3..8828d1ad62ec30d929643cda8e0aa5e89812c042 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -25,46 +25,49 @@ process {
   withName:getData {
     container = 'gudmaprbk/deriva1.3:1.0.0'
   }
-  withName: parseMetadata {
+  withName:parseMetadata {
     container = 'gudmaprbk/python3:1.0.0'
   }
-  withName: trimData {
+  withName:trimData {
     container = 'gudmaprbk/trimgalore0.6.5:1.0.0'
   }
-  withName: getRefInfer {
+  withName:getRefInfer {
     container = 'gudmaprbk/deriva1.3:1.0.0'
   }
-  withName: downsampleData {
+  withName:downsampleData {
     container = 'gudmaprbk/seqtk1.3:1.0.0'
   }
-  withName: alignSampleData {
+  withName:alignSampleData {
     container = 'gudmaprbk/hisat2.2.1:1.0.0'
   }
-  withName: inferMetadata {
+  withName:inferMetadata {
     container = 'gudmaprbk/rseqc4.0.0:1.0.0'
   }
-  withName: getRef {
+  withName:checkMetadata {
+    container = 'gudmaprbk/gudmap-rbk_base:1.0.0'
+  }
+  withName:getRef {
     container = 'gudmaprbk/deriva1.3:1.0.0'
   }
-  withName: alignData {
+  withName:alignData {
     container = 'gudmaprbk/hisat2.2.1:1.0.0'
   }
-  withName: dedupData {
+  withName:dedupData {
     container = 'gudmaprbk/picard2.23.9:1.0.0'
   }
-  withName: countData {
+  withName:countData {
     container = 'gudmaprbk/subread2.0.1:1.0.0'
   }
-  withName: makeBigWig {
+  withName:makeBigWig {
     container = 'gudmaprbk/deeptools3.5.0:1.0.0'
   }
-  withName: fastqc {
+  withName:fastqc {
     container = 'gudmaprbk/fastqc0.11.9:1.0.0'
   }
-  withName: dataQC {
+  withName:dataQC {
     container = 'gudmaprbk/rseqc4.0.0:1.0.0'
   }
-  withName: aggrQC {
+  withName:aggrQC {
     container = 'gudmaprbk/multiqc1.9:1.0.0'
   }
   withName:uploadInputBag {
@@ -82,11 +85,20 @@ process {
   withName:uploadOutputBag {
     container = 'gudmaprbk/deriva1.3:1.0.0'
   }
+  withName:finalizeExecutionRun {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
+  withName:failPreExecutionRun {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
+  withName:failExecutionRun {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
 }
 
 trace {
   enabled = false
-  file = 'pipeline_trace.txt'
+  file = 'trace.txt'
   fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
 }
 
@@ -110,6 +122,6 @@ manifest {
   homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
   description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
   mainScript = 'rna-seq.nf'
-  version = 'v0.0.4_indev'
+  version = 'v0.1.0'
   nextflowVersion = '>=19.09.0'
 }
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 3a97735fe88059a99de264e03d798dc5e2410032..8c247c2508b426992237fa5cfe5fb496e0f99579 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -14,8 +14,8 @@ params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
 //params.repRID = "16-1ZX4"
 params.repRID = "Q-Y5F6"
 params.source = "dev"
-params.refMoVersion = "38.p6.vM22"
-params.refHuVersion = "38.p12.v31"
+params.refMoVersion = "38.p6.vM25"
+params.refHuVersion = "38.p13.v36"
 params.refERCCVersion = "92"
 params.outDir = "${baseDir}/../output"
 params.upload = false
@@ -46,6 +46,9 @@ deriva.into {
   deriva_uploadQC
   deriva_uploadProcessedFile
   deriva_uploadOutputBag
+  deriva_finalizeExecutionRun
+  deriva_failPreExecutionRun
+  deriva_failExecutionRun
 }
 bdbag = Channel
   .fromPath(params.bdbag)
@@ -62,7 +65,7 @@ fastqsForce = params.fastqsForce
 speciesForce = params.speciesForce
 email = params.email
 
-// Define fixed files and
+// Define fixed files and variables
 replicateExportConfig = Channel.fromPath("${baseDir}/conf/Replicate_For_Input_Bag.json")
 executionRunExportConfig = Channel.fromPath("${baseDir}/conf/Execution_Run_For_Output_Bag.json")
 if (params.source == "dev") {
@@ -73,11 +76,9 @@ if (params.source == "dev") {
   source = "www.gudmap.org"
 }
 if (params.refSource == "biohpc") {
-  referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references"
-//} else if (params.refSource == "aws") {
-//  referenceBase = "s3://bicf-references"
+  referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references/new"
 } else if (params.refSource == "datahub") {
-  referenceBase = "dev.gudmap.org"
+  referenceBase = "www.gudmap.org"
 }
 referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
 multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml")
@@ -95,7 +96,10 @@ script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R")
 script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R")
 script_tinHist = Channel.fromPath("${baseDir}/scripts/tin_hist.py")
 script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/upload_input_bag.py")
-script_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
+script_uploadExecutionRun_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
+script_uploadExecutionRun_finalizeExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
+script_uploadExecutionRun_failPreExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
+script_uploadExecutionRun_failExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
 script_uploadQC = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
 script_uploadOutputBag = Channel.fromPath("${baseDir}/scripts/upload_output_bag.py")
 script_deleteEntry_uploadQC = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
@@ -105,7 +109,7 @@ script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/scripts/de
  * trackStart: track start of pipeline
  */
 process trackStart {
-  container 'docker://bicf/bicfbase:2.1.0'
+  container 'docker://gudmaprbk/gudmap-rbk_base:1.0.0'
   script:
   """
   hostname
@@ -139,6 +143,7 @@ Human Reference Version: ${params.refHuVersion}
 ERCC Reference Version : ${params.refERCCVersion}
 Reference source       : ${params.refSource}
 Output Directory       : ${params.outDir}
+Upload                 : ${upload}
 ------------------------------------
 Nextflow Version       : ${workflow.nextflow.version}
 Pipeline Version       : ${workflow.manifest.version}
@@ -150,11 +155,11 @@ Development            : ${params.dev}
 """
 
 /*
- * splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
+ * getBag: download input bag
  */
 process getBag {
   tag "${repRID}"
-  publishDir "${outDir}/inputBag", mode: 'copy', pattern: "Replicate_*.zip"
+  publishDir "${outDir}/inputBag", mode: 'copy', pattern: "*_inputBag_*.zip"
 
   input:
     path credential, stageAs: "credential.json" from deriva_getBag
@@ -205,7 +210,7 @@ inputBag.into {
 }
 
 /*
- * getData: fetch study files from consortium with downloaded bdbag.zip
+ * getData: fetch replicate files from consortium with downloaded bdbag.zip
  */
 process getData {
   tag "${repRID}"
@@ -220,6 +225,7 @@ process getData {
     path ("**/File.csv") into fileMeta
     path ("**/Experiment Settings.csv") into experimentSettingsMeta
     path ("**/Experiment.csv") into experimentMeta
+    path "fastqCount.csv" into fastqCount_fl
 
   script:
     """
@@ -245,19 +251,30 @@ process getData {
     echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
     sh ${script_bdbagFetch} \${replicate::-13} ${repRID}
     echo -e "LOG: fetched" >> ${repRID}.getData.log
+    
+    fastqCount=\$(ls *.fastq.gz | wc -l)
+    echo "\${fastqCount}" > fastqCount.csv
     """
 }
 
+// Split fastq count into channel
+fastqCount = Channel.create()
+fastqCount_fl.splitCsv(sep: ",", header: false).separate(
+  fastqCount
+)
+
 // Set raw fastq to downloaded or forced input and replicate them for multiple process inputs
 if (fastqsForce != "") {
   Channel
     .fromPath(fastqsForce)
     .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
-    .collect().set {
+    .collect().into {
+      fastqs_parseMetadata
       fastqs_trimData
     }
 } else {
-  fastqs.set {
+  fastqs.into {
+    fastqs_parseMetadata
     fastqs_trimData
   }
 }
@@ -273,9 +290,12 @@ process parseMetadata {
     path file from fileMeta
     path experimentSettings, stageAs: "ExperimentSettings.csv" from experimentSettingsMeta
     path experiment from experimentMeta
+    path (fastq) from fastqs_parseMetadata
+    val fastqCount
 
   output:
     path "design.csv" into metadata_fl
+    path "fastqError.csv" into fastqError_fl
 
   script:
     """
@@ -295,8 +315,21 @@ process parseMetadata {
     echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log
 
     # get endedness metadata
-    endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta)
-    echo -e "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log
+    endsRaw=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta)
+    echo -e "LOG: endedness metadata parsed: \${endsRaw}" >> ${repRID}.parseMetadata.log
+    if [ "\${endsRaw}" == "Single Read" ]
+    then
+      endsMeta="se"
+    elif [ "\${endsRaw}" == "Paired End" ]
+    then
+      endsMeta="pe"
+    else
+      endsMeta="unknown"
+    fi
+    if [ "\${endsRaw}" == "" ]
+    then
+      endsRaw="_No value_"
+    fi
 
     # ganually get endness
     endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p endsManual)
@@ -316,19 +349,54 @@ process parseMetadata {
 
     # get read length metadata
     readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength)
-    if [ "\${readLength}" = "nan"]
+    if [ "\${readLength}" = "nan" ]
     then
       readLength="NA"
     fi
     echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log
 
+    # check not incorrect number of fastqs
+    fastqCountError=false
+    fastqCountError_details=""
+    if [ "${fastqCount}" -gt "2" ]
+    then
+      fastqCountError=true
+      fastqCountError_details="**Too many fastqs detected (>2)**"
+    elif [ "\${endsMeta}" == "se" ] && [ "${fastqCount}" -ne "1" ]
+    then
+      fastqCountError=true
+      fastqCountError_details="**Number of fastqs detected does not match submitted endness**"
+    elif [ "\${endsMeta}" == "pe" ] && [ "${fastqCount}" -ne "2" ]
+    then
+      fastqCountError=true
+      fastqCountError_details="**Number of fastqs detected does not match submitted endness**"
+    fi
+
+    # check read counts match for fastqs
+    fastqReadError=false
+    fastqReadError_details=""
+    if [ "\${endsManual}" == "pe" ]
+    then
+      r1Count=\$(zcat ${fastq[0]} | wc -l)
+      r2Count=\$(zcat ${fastq[1]} | wc -l)
+      if [ "\${r1Count}" -ne "\${r2Count}" ]
+      then
+        fastqReadError=true
+        fastqReadError_details="**Number of reads do not match for R1 and R2:** there may be a trunkation or mismatch of fastq files"
+      fi
+    fi
+
     # save design file
-    echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv
+    echo "\${endsMeta},\${endsRaw},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv
+
+    # save fastq error file
+    echo "\${fastqCountError},\${fastqCountError_details},\${fastqReadError},\${fastqReadError_details}" > fastqError.csv
     """
 }
 
 // Split metadata into separate channels
 endsMeta = Channel.create()
+endsRaw = Channel.create()
 endsManual = Channel.create()
 strandedMeta = Channel.create()
 spikeMeta = Channel.create()
@@ -338,6 +406,7 @@ expRID = Channel.create()
 studyRID = Channel.create()
 metadata_fl.splitCsv(sep: ",", header: false).separate(
   endsMeta,
+  endsRaw,
   endsManual,
   strandedMeta,
   spikeMeta,
@@ -348,12 +417,34 @@ metadata_fl.splitCsv(sep: ",", header: false).separate(
 )
 
 // Replicate metadata for multiple process inputs
+endsMeta.into {
+  endsMeta_checkMetadata
+  endsMeta_aggrQC
+  endsMeta_failExecutionRun
+}
 endsManual.into {
   endsManual_trimData
   endsManual_downsampleData
   endsManual_alignSampleData
   endsManual_aggrQC
 }
+strandedMeta.into {
+  strandedMeta_checkMetadata
+  strandedMeta_aggrQC
+  strandedMeta_failExecutionRun
+}
+spikeMeta.into {
+  spikeMeta_checkMetadata
+  spikeMeta_aggrQC
+  spikeMeta_failPreExecutionRun
+  spikeMeta_failExecutionRun
+}
+speciesMeta.into {
+  speciesMeta_checkMetadata
+  speciesMeta_aggrQC
+  speciesMeta_failPreExecutionRun
+  speciesMeta_failExecutionRun
+}
 studyRID.into {
   studyRID_aggrQC
   studyRID_uploadInputBag
@@ -365,6 +456,61 @@ expRID.into {
   expRID_uploadProcessedFile
 }
 
+// Split fastq count error into separate channel
+fastqCountError = Channel.create()
+fastqCountError_details = Channel.create()
+fastqReadError = Channel.create()
+fastqReadError_details = Channel.create()
+fastqError_fl.splitCsv(sep: ",", header: false).separate(
+  fastqCountError,
+  fastqCountError_details,
+  fastqReadError,
+  fastqReadError_details
+)
+
+//  Replicate errors for multiple process inputs
+fastqCountError.into {
+  fastqCountError_trimData
+  fastqCountError_getRefInfer
+  fastqCountError_downsampleData
+  fastqCountError_alignSampleData
+  fastqCountError_inferMetadata
+  fastqCountError_checkMetadata
+  fastqCountError_uploadExecutionRun
+  fastqCountError_getRef
+  fastqCountError_alignData
+  fastqCountError_dedupData
+  fastqCountError_makeBigWig
+  fastqCountError_countData
+  fastqCountError_fastqc
+  fastqCountError_dataQC
+  fastqCountError_aggrQC
+  fastqCountError_uploadQC
+  fastqCountError_uploadProcessedFile
+  fastqCountError_uploadOutputBag
+  fastqCountError_failPreExecutionRun
+}
+fastqReadError.into {
+  fastqReadError_trimData
+  fastqReadError_getRefInfer
+  fastqReadError_downsampleData
+  fastqReadError_alignSampleData
+  fastqReadError_inferMetadata
+  fastqReadError_checkMetadata
+  fastqReadError_uploadExecutionRun
+  fastqReadError_getRef
+  fastqReadError_alignData
+  fastqReadError_dedupData
+  fastqReadError_makeBigWig
+  fastqReadError_countData
+  fastqReadError_fastqc
+  fastqReadError_dataQC
+  fastqReadError_aggrQC
+  fastqReadError_uploadQC
+  fastqReadError_uploadProcessedFile
+  fastqReadError_uploadOutputBag
+  fastqReadError_failPreExecutionRun
+}
 
 /*
  * trimData: trims any adapter or non-host sequences from the data
@@ -375,6 +521,8 @@ process trimData {
   input:
     path (fastq) from fastqs_trimData
     val ends from endsManual_trimData
+    val fastqCountError_trimData
+    val fastqReadError_trimData
 
   output:
     path ("*.fq.gz") into fastqsTrim
@@ -382,6 +530,10 @@ process trimData {
     path ("*_trimming_report.txt") into trimQC
     path ("readLength.csv") into readLengthInfer_fl
 
+  when:
+    fastqCountError_trimData == "false"
+    fastqReadError_trimData == "false"
+
   script:
     """
     hostname > ${repRID}.trimData.log
@@ -402,7 +554,7 @@ process trimData {
     echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log
 
     # save read length file
-    echo -e "\${readLength}" > readLength.csv
+    echo "\${readLength}" > readLength.csv
     """
 }
 
@@ -412,7 +564,7 @@ readLengthInfer_fl.splitCsv(sep: ",", header: false).separate(
   readLengthInfer
 )
 
-// Replicate infered read length for multiple process inputs
+// Replicate inferred read length for multiple process inputs
 readLengthInfer.into {
   readLengthInfer_aggrQC
   readLengthInfer_uploadQC
@@ -424,7 +576,7 @@ fastqsTrim.into {
 }
 
 // Combine inputs of getRefInfer
-getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refDataInfer))
+getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refDataInfer.combine(fastqCountError_getRefInfer.combine(fastqReadError_getRefInfer))))
 
 /*
   * getRefInfer: dowloads appropriate reference for metadata inference
@@ -433,22 +585,26 @@ process getRefInfer {
   tag "${refName}"
 
   input:
-    tuple val (refName), path (credential, stageAs: "credential.json"), path (script_refDataInfer) from getRefInferInput
+    tuple val (refName), path (credential, stageAs: "credential.json"), path (script_refDataInfer), val (fastqCountError), val (fastqReadError) from getRefInferInput
 
   output:
     tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf")  into refInfer
     path ("${refName}", type: 'dir') into bedInfer
 
+  when:
+    fastqCountError == "false"
+    fastqReadError == "false"
+
   script:
     """
     hostname > ${repRID}.${refName}.getRefInfer.log
     ulimit -a >> ${repRID}.${refName}.getRefInfer.log
 
     # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.getRefInfer.log
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.${refName}.getRefInfer.log
     mkdir -p ~/.deriva
     ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.getRefInfer.log
+    echo -e "LOG: linked" >> ${repRID}.${refName}.getRefInfer.log
 
     # set the reference name
     if [ "${refName}" == "ERCC" ]
@@ -464,23 +620,14 @@ process getRefInfer {
       echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
       exit 1
     fi
-    mkdir ${refName}
 
     # retreive appropriate reference appropriate location
     echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
+    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
     then
-      ln -s "\${references}"/hisat2
-      ln -s "\${references}"/bed ${refName}/bed
-      ln -s "\${references}"/genome.fna
-      ln -s "\${references}"/genome.gtf
-    #elif [ ${referenceBase} == "s3://bicf-references" ]
-    #then
-    #  aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
-    #  aws s3 cp "\${references}"/bed ./${refName}/bed --recursive
-    #  aws s3 cp "\${references}"/genome.fna ./
-    #  aws s3 cp "\${references}"/genome.gtf ./
-    elif [ ${referenceBase} == "dev.gudmap.org" ]
+      unzip \${references}.zip
+      mv \$(basename \${references})/data/* .
+    elif [ params.refSource == "datahub" ]
     then
       GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
       GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
@@ -502,19 +649,14 @@ process getRefInfer {
       unzip \$(basename \${refURL})
       mv \${fName}/data/* .
     fi
-    echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
-
-    # make blank bed folder for ERCC
-    echo -e "LOG: making dummy bed folder for ERCC" >> ${repRID}.${refName}.getRefInfer.log
-    if [ "${refName}" == "ERCC" ]
-    then
-      rm -rf ${refName}/bed
-      mkdir ${refName}/bed
-      touch ${refName}/bed/temp
-    elif [ ${referenceBase} == "dev.gudmap.org" ]
+    mv ./annotation/genome.gtf .
+    mv ./sequence/genome.fna .
+    mkdir ${refName}
+    if [ "${refName}" != "ERCC" ]
     then
-      mv bed ${refName}/
+      mv ./annotation/genome.bed ./${refName}
     fi
+    echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
     """
 }
 
@@ -527,11 +669,17 @@ process downsampleData {
   input:
     path fastq from fastqsTrim_downsampleData
     val ends from endsManual_downsampleData
+    val fastqCountError_downsampleData
+    val fastqReadError_downsampleData
 
   output:
     path ("sampled.1.fq") into fastqs1Sample
     path ("sampled.2.fq") into fastqs2Sample
 
+  when:
+    fastqCountError_downsampleData == "false"
+    fastqReadError_downsampleData == "false"
+
   script:
     """
     hostname > ${repRID}.downsampleData.log
@@ -554,7 +702,7 @@ process downsampleData {
 }
 
 // Replicate the dowsampled fastq's and attatched to the references
-inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect())))
+inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect().combine(fastqCountError_alignSampleData.combine(fastqReadError_alignSampleData)))))
 
 /*
  * alignSampleData: aligns the downsampled reads to a reference database
@@ -563,13 +711,17 @@ process alignSampleData {
   tag "${ref}"
 
   input:
-    tuple val (ends), val (ref), path (hisat2), path (fna), path (gtf), path (fastq1), path (fastq2) from inferInput
+    tuple val (ends), val (ref), path (hisat2), path (fna), path (gtf), path (fastq1), path (fastq2), val (fastqCountError), val (fastqReadError) from inferInput
 
   output:
     path ("${ref}.sampled.sorted.bam") into sampleBam
     path ("${ref}.sampled.sorted.bam.bai") into sampleBai
     path ("${ref}.alignSampleSummary.txt") into alignSampleQC
 
+  when:
+    fastqCountError == "false"
+    fastqReadError == "false"
+
   script:
     """
     hostname > ${repRID}.${ref}.alignSampleData.log
@@ -615,10 +767,17 @@ process inferMetadata {
     path bam from sampleBam.collect()
     path bai from sampleBai.collect()
     path alignSummary from alignSampleQC_inferMetadata.collect()
+    val fastqCountError_inferMetadata
+    val fastqReadError_inferMetadata
 
   output:
     path "infer.csv" into inferMetadata_fl
     path "${repRID}.infer_experiment.txt" into inferExperiment
+    path "speciesError.csv" into speciesError_fl
+
+  when:
+    fastqCountError_inferMetadata == "false"
+    fastqReadError_inferMetadata == "false"
 
   script:
     """
@@ -645,73 +804,93 @@ process inferMetadata {
     fi
     echo -e "LOG: inference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
 
+    speciesError=false
+    speciesError_details=""
     # determine species
     if [ 1 -eq \$(echo \$(expr \${align_hu} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_mo} "<" 40)) ]
     then
       species="Homo sapiens"
       bam="GRCh.sampled.sorted.bam"
-      bed="./GRCh/bed/genome.bed"
+      bed="./GRCh/genome.bed"
+      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
     elif [ 1 -eq \$(echo \$(expr \${align_mo} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_hu} "<" 40)) ]
     then
       species="Mus musculus"
       bam="GRCm.sampled.sorted.bam"
-      bed="./GRCm/bed/genome.bed"
+      bed="./GRCm/genome.bed"
+      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
     else
       echo -e "LOG: ERROR - inference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
       if [ "${speciesForce}" == "" ]
       then
-        exit 1
+        speciesError=true
+        speciesError_details="**Inference of species returns an ambiguous result:** Percent aligned to human = \${align_hu} and percent aligned to mouse = \${align_mo}"
       fi
     fi
     if [ "${speciesForce}" != "" ]
     then
+      speciesError=false
       echo -e "LOG: species overridden to: ${speciesForce}"
       species="${speciesForce}"
       if [ "${speciesForce}" == "Homo sapiens" ]
       then
         bam="GRCh.sampled.sorted.bam"
-        bed="./GRCh/bed/genome.bed"
+        bed="./GRCh/genome.bed"
       elif [ "${speciesForce}" == "Mus musculus" ]
       then
         bam="GRCm.sampled.sorted.bam"
-        bed="./GRCm/bed/genome.bed"
+        bed="./GRCm/genome.bed"
       fi
     fi
-    echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
 
-    # infer experimental setting from dedup bam
-    echo -e "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
-    infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
-    echo -e "LOG: infered" >> ${repRID}.inferMetadata.log
-
-    ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
-    fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
-    if [ \${ended} == "PairEnd" ]
-    then
-      ends="pe"
-      percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
-      percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
-    elif [ \${ended} == "SingleEnd" ]
+    if [ "\${speciesError}" == false ]
     then
-      ends="se"
-      percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
-      percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
-    fi
-    echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
-    echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
-    if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
-    then
-      stranded="forward"
-    elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
-    then
-      stranded="reverse"
+      # infer experimental setting from dedup bam
+      echo -e "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
+      infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
+      echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
+
+      ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
+      fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
+      if [ \${ended} == "PairEnd" ]
+      then
+        ends="pe"
+        percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
+        percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
+      elif [ \${ended} == "SingleEnd" ]
+      then
+        ends="se"
+        percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
+        percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
+      fi
+      echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
+      echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
+      if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
+      then
+        stranded="forward"
+      elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
+      then
+        stranded="reverse"
+      else
+        stranded="unstranded"
+      fi
+      echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
     else
-      stranded="unstranded"
+      ends=""
+      stranded=""
+      spike=""
+      species=""
+      percentF=""
+      percentR=""
+      fail=""
+      touch ${repRID}.infer_experiment.txt
     fi
-    echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
 
-    # write infered metadata to file
-    echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" 1>> infer.csv
+    # write inferred metadata to file
+    echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" > infer.csv
+
+    # save species error file
+    echo "\${speciesError},\${speciesError_details}" > speciesError.csv
     """
 }
 
@@ -741,30 +920,346 @@ inferMetadata_fl.splitCsv(sep: ",", header: false).separate(
 
 // Replicate metadata for multiple process inputs
 endsInfer.into {
+  endsInfer_checkMetadata
   endsInfer_alignData
   endsInfer_countData
   endsInfer_dataQC
   endsInfer_aggrQC
   endsInfer_uploadQC
+  endsInfer_failExecutionRun
 }
 strandedInfer.into {
+  strandedInfer_checkMetadata
   strandedInfer_alignData
   strandedInfer_countData
   strandedInfer_aggrQC
   strandedInfer_uploadQC
+  strandedInfer_failExecutionRun
 }
 spikeInfer.into{
+  spikeInfer_checkMetadata
   spikeInfer_getRef
   spikeInfer_aggrQC
   spikeInfer_uploadExecutionRun
+  spikeInfer_failExecutionRun
 }
 speciesInfer.into {
+  speciesInfer_checkMetadata
   speciesInfer_getRef
   speciesInfer_aggrQC
   speciesInfer_uploadExecutionRun
   speciesInfer_uploadProcessedFile
+  speciesInfer_failExecutionRun
+}
+
+// Split species count error into separate channel
+speciesError = Channel.create()
+speciesError_details = Channel.create()
+speciesError_fl.splitCsv(sep: ",", header: false).separate(
+  speciesError,
+  speciesError_details
+)
+
+//  Replicate errors for multiple process inputs
+speciesError.into {
+  speciesError_checkMetadata
+  speciesError_uploadExecutionRun
+  speciesError_getRef
+  speciesError_alignData
+  speciesError_dedupData
+  speciesError_makeBigWig
+  speciesError_countData
+  speciesError_fastqc
+  speciesError_dataQC
+  speciesError_aggrQC
+  speciesError_uploadQC
+  speciesError_uploadProcessedFile
+  speciesError_uploadOutputBag
+  speciesError_failPreExecutionRun
+}
+
+/* 
+ * checkMetadata: checks the submitted metada against inferred
+*/
+process checkMetadata {
+  tag "${repRID}"
+
+  input:
+    val endsMeta from endsMeta_checkMetadata
+    val strandedMeta from strandedMeta_checkMetadata
+    val spikeMeta from spikeMeta_checkMetadata
+    val speciesMeta from speciesMeta_checkMetadata
+    val endsInfer from endsInfer_checkMetadata
+    val strandedInfer from strandedInfer_checkMetadata
+    val spikeInfer from spikeInfer_checkMetadata
+    val speciesInfer from speciesInfer_checkMetadata
+    val fastqCountError_checkMetadata
+    val fastqReadError_checkMetadata
+    val speciesError_checkMetadata
+
+  output:
+    path ("check.csv") into checkMetadata_fl
+    path ("outputBagRID.csv") optional true into outputBagRID_fl_dummy
+
+  when:
+    fastqCountError_checkMetadata == "false"
+    fastqReadError_checkMetadata == "false"
+    speciesError_checkMetadata == "false"
+
+  script:
+    """
+    hostname > ${repRID}.checkMetadata.log
+    ulimit -a >> ${repRID}.checkMetadata.log
+
+    pipelineError=false
+    # check if submitted metadata matches inferred
+    if [ "${endsMeta}" != "${endsInfer}" ]
+    then
+      pipelineError=true
+      pipelineError_ends=true
+      echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
+    else
+      pipelineError_ends=false
+      echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
+    fi
+    if [ "${strandedMeta}" != "${strandedInfer}" ]
+    then
+      pipelineError=true
+      pipelineError_stranded=true
+      if [ "${strandedMeta}" == "stranded" ]
+      then
+        if [[ "${strandedInfer}" == "forward" ]] || [[ "${strandedInfer}" == "reverse" ]]
+        then
+          pipelineError=false
+          pipelineError_stranded=false
+          echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
+        else
+          echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
+        fi
+      else
+        echo -e "LOG: stranded does not match: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
+      fi
+    else
+      pipelineError=false
+      pipelineError_stranded=false
+      echo -e "LOG: stranded matches: Submitted=${strandedMeta}; Inferred=${strandedInfer}" >> ${repRID}.checkMetadata.log
+    fi
+    if [ "${spikeMeta}" != "${spikeInfer}" ]
+    then
+      pipelineError=true
+      pipelineError_spike=true
+      echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
+    else
+      pipelineError_spike=false
+      echo -e "LOG: stranded matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
+    fi
+    if [ "${speciesMeta}" != "${speciesInfer}" ]
+    then
+      pipelineError=true
+      pipelineError_species=true
+      echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
+    else
+      pipelineError_species=false
+      echo -e "LOG: species matches: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
+    fi
+
+    # create dummy output bag rid if failure
+    if [ \${pipelineError} == true ]
+    then
+      echo "fail" > outputBagRID.csv
+    fi
+
+    # write checks to file
+    echo "\${pipelineError},\${pipelineError_ends},\${pipelineError_stranded},\${pipelineError_spike},\${pipelineError_species}" > check.csv
+    """
+}
+
+// Split errors into separate channels
+pipelineError = Channel.create()
+pipelineError_ends = Channel.create()
+pipelineError_stranded = Channel.create()
+pipelineError_spike = Channel.create()
+pipelineError_species = Channel.create()
+checkMetadata_fl.splitCsv(sep: ",", header: false).separate(
+  pipelineError,
+  pipelineError_ends,
+  pipelineError_stranded,
+  pipelineError_spike,
+  pipelineError_species
+)
+
+// Replicate errors for multiple process inputs
+pipelineError.into {
+  pipelineError_getRef
+  pipelineError_alignData
+  pipelineError_dedupData
+  pipelineError_makeBigWig
+  pipelineError_countData
+  pipelineError_fastqc
+  pipelineError_dataQC
+  pipelineError_aggrQC
+  pipelineError_uploadQC
+  pipelineError_uploadProcessedFile
+  pipelineError_uploadOutputBag
+  pipelineError_failExecutionRun
+}
+
+/* 
+ * uploadInputBag: uploads the input bag
+*/
+process uploadInputBag {
+  tag "${repRID}"
+
+  input:
+    path script_uploadInputBag
+    path credential, stageAs: "credential.json" from deriva_uploadInputBag
+    path inputBag from inputBag_uploadInputBag
+    val studyRID from studyRID_uploadInputBag
+
+  output:
+    path ("inputBagRID.csv") into inputBagRID_fl
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.uploadInputBag.log
+  ulimit -a >> ${repRID}.uploadInputBag.log
+
+  yr=\$(date +'%Y')
+  mn=\$(date +'%m')
+  dy=\$(date +'%d')
+
+  file=\$(basename -a ${inputBag})
+  md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
+  echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
+  size=\$(wc -c < ./\${file})
+  echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
+  
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
+  if [ "\${exist}" == "[]" ]
+  then
+      cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+      cookie=\${cookie:11:-1}
+
+      loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
+      inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
+      echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
+      rid=\${inputBag_rid}
+  else
+      exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+      exist=\${exist:7:-6}
+      echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
+      rid=\${exist}
+  fi
+
+  echo "\${rid}" > inputBagRID.csv
+  """
 }
 
+// Extract input bag RID into channel
+inputBagRID = Channel.create()
+inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
+  inputBagRID
+)
+
+// Replicate input bag RID for multiple process inputs
+inputBagRID.into {
+  inputBagRID_uploadExecutionRun
+  inputBagRID_finalizeExecutionRun
+  inputBagRID_failPreExecutionRun
+  inputBagRID_failExecutionRun
+}
+
+/* 
+ * uploadExecutionRun: uploads the execution run
+*/
+process uploadExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun_uploadExecutionRun
+    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
+    val spike from spikeInfer_uploadExecutionRun
+    val species from speciesInfer_uploadExecutionRun
+    val inputBagRID from inputBagRID_uploadExecutionRun
+    val fastqCountError_uploadExecutionRun
+    val fastqReadError_uploadExecutionRun
+    val speciesError_uploadExecutionRun
+    
+  output:
+    path ("executionRunRID.csv") into executionRunRID_fl
+
+  when:
+    upload
+    fastqCountError_uploadExecutionRun == "false"
+    fastqReadError_uploadExecutionRun == "false"
+    speciesError_uploadExecutionRun == "false"
+
+  script:
+  """
+  hostname > ${repRID}.uploadExecutionRun.log
+  ulimit -a >> ${repRID}.uploadExecutionRun.log
+
+  echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
+  workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
+  workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  workflow=\${workflow:7:-6}
+  echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
+
+  if [ "${species}" == "Homo sapiens" ]
+  then
+    genomeName=\$(echo GRCh${refHuVersion})
+  elif [ "${species}" == "Mus musculus" ]
+  then
+    genomeName=\$(echo GRCm${refMoVersion})
+  fi
+  if [ "${spike}" == "yes" ]
+  then
+    genomeName=\$(echo \${genomeName}-S)
+  fi
+  echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
+  genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
+  genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  genome=\${genome:7:-6}
+  echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
+  echo \${exist} >> ${repRID}.uploadExecutionRun.log
+  if [ "\${exist}" == "[]" ]
+  then
+    executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
+    echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+  else
+    rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+    rid=\${rid:7:-6}
+    echo \${rid} >> ${repRID}.uploadExecutionRun.log
+    executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
+    echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+  fi
+
+  echo "\${executionRun_rid}" > executionRunRID.csv
+  """
+}
+
+// Extract execution run RID into channel
+executionRunRID = Channel.create()
+executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
+  executionRunRID
+)
+
+// Replicate execution run RID for multiple process inputs
+executionRunRID.into {
+  executionRunRID_uploadQC
+  executionRunRID_uploadProcessedFile
+  executionRunRID_uploadOutputBag
+  executionRunRID_finalizeExecutionRun
+  executionRunRID_failExecutionRun
+}
 
 /*
   * getRef: downloads appropriate reference
@@ -777,9 +1272,19 @@ process getRef {
     path credential, stageAs: "credential.json" from deriva_getRef
     val spike from spikeInfer_getRef
     val species from speciesInfer_getRef
+    val fastqCountError_getRef
+    val fastqReadError_getRef
+    val speciesError_getRef
+    val pipelineError_getRef
 
   output:
-    tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv")  into reference
+    tuple path ("hisat2", type: 'dir'), path ("*.bed"), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv")  into reference
+
+  when:
+    fastqCountError_getRef == "false"
+    fastqReadError_getRef == "false"
+    speciesError_getRef == "false"
+    pipelineError_getRef == "false"
 
   script:
     """
@@ -807,34 +1312,21 @@ process getRef {
     fi
     if [ "${spike}" == "yes" ]
     then
-      references=\$(echo \${reference}-S/)
+      references=\$(echo \${reference}-S)
     elif [ "${spike}" == "no" ]
     then
-      reference=\$(echo \${references}/)
+      reference=\$(echo \${references})
     fi
     echo -e "LOG: species set to \${references}" >> ${repRID}.getRef.log
 
     # retreive appropriate reference appropriate location
     echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
+    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
     then
       echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
-      ln -s "\${references}"/hisat2
-      ln -s "\${references}"/bed
-      ln -s "\${references}"/genome.fna
-      ln -s "\${references}"/genome.gtf
-      ln -s "\${references}"/geneID.tsv
-      ln -s "\${references}"/Entrez.tsv
-    #elif [ ${referenceBase} == "s3://bicf-references" ]
-    #then
-    #  echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log
-    #  aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
-    #  aws s3 cp "\${references}"/bed ./bed --recursive
-    #  aws s3 cp "\${references}"/genome.fna ./
-    #  aws s3 cp "\${references}"/genome.gtf ./
-    #  aws s3 cp "\${references}"/geneID.tsv ./
-    #  aws s3 cp "\${references}"/Entrez.tsv ./
-    elif [ ${referenceBase} == "dev.gudmap.org" ]
+      unzip \${reference}.zip
+      mv \$(basename \${reference})/data/* .
+    elif [ arams.refSource == "datahub" ]
     then
       echo -e "LOG: grabbing reference files from datahub" >> ${repRID}.getRef.log
       GRCv=\$(echo \${references} | grep -o \${refName}.* | cut -d '.' -f1)
@@ -853,6 +1345,12 @@ process getRef {
       mv \${fName}/data/* .
     fi
     echo -e "LOG: fetched" >> ${repRID}.getRef.log
+
+    mv ./annotation/genome.gtf .
+    mv ./sequence/genome.fna .
+    mv ./annotation/genome.bed .
+    mv ./metadata/Entrez.tsv .
+    mv ./metadata/geneID.tsv .
     """
 }
 
@@ -874,11 +1372,21 @@ process alignData {
     path reference_alignData
     val ends from endsInfer_alignData
     val stranded from strandedInfer_alignData
+    val fastqCountError_alignData
+    val fastqReadError_alignData
+    val speciesError_alignData
+    val pipelineError_alignData
 
   output:
     tuple path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam
     path ("*.alignSummary.txt") into alignQC
 
+  when:
+    fastqCountError_alignData == "false"
+    fastqReadError_alignData == "false"
+    speciesError_alignData == "false"
+    pipelineError_alignData == "false"
+
   script:
     """
     hostname > ${repRID}.align.log
@@ -928,7 +1436,7 @@ process alignData {
 }
 
 // Replicate rawBam for multiple process inputs
-rawBam.into {
+rawBam.set {
   rawBam_dedupData
 }
 
@@ -941,12 +1449,22 @@ process dedupData {
 
   input:
     tuple path (bam), path (bai) from rawBam_dedupData
+    val fastqCountError_dedupData
+    val fastqReadError_dedupData
+    val speciesError_dedupData
+    val pipelineError_dedupData
 
   output:
     tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam
     tuple path ("${repRID}_sorted.deduped.*.bam"), path ("${repRID}_sorted.deduped.*.bam.bai") into dedupChrBam
     path ("*.deduped.Metrics.txt") into dedupQC
 
+  when:
+    fastqCountError_dedupData == 'false'
+    fastqReadError_dedupData == 'false'
+    speciesError_dedupData == 'false'
+    pipelineError_dedupData == 'false'
+
   script:
     """
     hostname > ${repRID}.dedup.log
@@ -966,7 +1484,7 @@ process dedupData {
     samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai
 
     # split the deduped BAM file for multi-threaded tin calculation
-    for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | sort | uniq`;
+    for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | grep -o chr.[0-9]* | sort | uniq`;
       do
       echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai"
     done | parallel -j `nproc` -k
@@ -990,10 +1508,20 @@ process makeBigWig {
 
   input:
     tuple path (bam), path (bai) from dedupBam_makeBigWig
+    val fastqCountError_makeBigWig
+    val fastqReadError_makeBigWig
+    val speciesError_makeBigWig
+    val pipelineError_makeBigWig
 
   output:
     path ("${repRID}_sorted.deduped.bw") into bigwig
 
+  when:
+    fastqCountError_makeBigWig == 'false'
+    fastqReadError_makeBigWig == 'false'
+    speciesError_makeBigWig == 'false'
+    pipelineError_makeBigWig == 'false'
+
   script:
     """
     hostname > ${repRID}.makeBigWig.log
@@ -1020,12 +1548,22 @@ process countData {
     path ref from reference_countData
     val ends from endsInfer_countData
     val stranded from strandedInfer_countData
+    val fastqCountError_countData
+    val fastqReadError_countData
+    val speciesError_countData
+    val pipelineError_countData
 
   output:
     path ("*_tpmTable.csv") into counts
     path ("*_countData.summary") into countsQC
     path ("assignedReads.csv") into assignedReadsInfer_fl
 
+  when:
+    fastqCountError_countData == 'false'
+    fastqReadError_countData == 'false'
+    speciesError_countData == 'false'
+    pipelineError_countData == 'false'
+
   script:
     """
     hostname > ${repRID}.countData.log
@@ -1077,7 +1615,7 @@ assignedReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
   assignedReadsInfer
 )
 
-// Replicate infered assigned reads for multiple process inputs
+// Replicate inferred assigned reads for multiple process inputs
 assignedReadsInfer.into {
   assignedReadsInfer_aggrQC
   assignedReadsInfer_uploadQC
@@ -1091,11 +1629,21 @@ process fastqc {
 
   input:
     path (fastq) from fastqs_fastqc
+    val fastqCountError_fastqc
+    val fastqReadError_fastqc
+    val speciesError_fastqc
+    val pipelineError_fastqc
 
   output:
     path ("*_fastqc.zip") into fastqc
     path ("rawReads.csv") into rawReadsInfer_fl
 
+  when:
+    fastqCountError_fastqc == 'false'
+    fastqReadError_fastqc == 'false'
+    speciesError_fastqc == 'false'
+    pipelineError_fastqc == 'false'
+
   script:
     """
     hostname > ${repRID}.fastqc.log
@@ -1116,7 +1664,7 @@ rawReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
   rawReadsInfer
 )
 
-// Replicate infered raw reads for multiple process inputs
+// Replicate inferred raw reads for multiple process inputs
 rawReadsInfer.into {
   rawReadsInfer_aggrQC
   rawReadsInfer_uploadQC
@@ -1134,12 +1682,22 @@ process dataQC {
     tuple path (bam), path (bai) from dedupBam_dataQC
     tuple path (chrBam), path (chrBai) from dedupChrBam
     val ends from endsInfer_dataQC
+    val fastqCountError_dataQC
+    val fastqReadError_dataQC
+    val speciesError_dataQC
+    val pipelineError_dataQC
 
   output:
     path "${repRID}_tin.hist.tsv" into tinHist
     path "${repRID}_tin.med.csv" into  tinMedInfer_fl
     path "${repRID}_insertSize.inner_distance_freq.txt" into innerDistance
 
+  when:
+    fastqCountError_dataQC == 'false'
+    fastqReadError_dataQC == 'false'
+    speciesError_dataQC == 'false'
+    pipelineError_dataQC == 'false'
+
   script:
     """
     hostname > ${repRID}.dataQC.log
@@ -1147,8 +1705,8 @@ process dataQC {
 
     # calcualte TIN values per feature on each chromosome
     echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}_sorted.deduped.tin.xls
-    for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do
-      echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam  -r ./bed/genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
+    for i in `cat ./genome.bed | cut -f1 | grep -o chr.[0-9]* | sort | uniq`; do
+      echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam  -r ./genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
     done | parallel -j `nproc` -k 1>> ${repRID}_sorted.deduped.tin.xls
 
     # bin TIN values
@@ -1160,7 +1718,7 @@ process dataQC {
     if [ "${ends}" == "pe" ]
     then
       echo -e "LOG: calculating inner distances for ${ends}" >> ${repRID}.dataQC.log
-      inner_distance.py -i "${bam}" -o ${repRID}_insertSize -r ./bed/genome.bed
+      inner_distance.py -i "${bam}" -o ${repRID}_insertSize -r ./genome.bed
       echo -e "LOG: calculated" >> ${repRID}.dataQC.log
     elif [ "${ends}" == "se" ]
     then
@@ -1199,10 +1757,10 @@ process aggrQC {
     path alignSampleQCs from alignSampleQC_aggrQC.collect()
     path inferExperiment
     val endsManual from endsManual_aggrQC
-    val endsM from endsMeta
-    val strandedM from strandedMeta
-    val spikeM from spikeMeta
-    val speciesM from speciesMeta
+    val endsM from endsMeta_aggrQC
+    val strandedM from strandedMeta_aggrQC
+    val spikeM from spikeMeta_aggrQC
+    val speciesM from speciesMeta_aggrQC
     val endsI from endsInfer_aggrQC
     val strandedI from strandedInfer_aggrQC
     val spikeI from spikeInfer_aggrQC
@@ -1214,11 +1772,21 @@ process aggrQC {
     val tinMedI from tinMedInfer
     val studyRID from studyRID_aggrQC
     val expRID from expRID_aggrQC
+    val fastqCountError_aggrQC
+    val fastqReadError_aggrQC
+    val speciesError_aggrQC
+    val pipelineError_aggrQC
 
   output:
     path "${repRID}.multiqc.html" into multiqc
     path "${repRID}.multiqc_data.json" into multiqcJSON
 
+  when:
+    fastqCountError_aggrQC == 'false'
+    fastqReadError_aggrQC == 'false'
+    speciesError_aggrQC == 'false'
+    pipelineError_aggrQC == 'false'
+
   script:
     """
     hostname > ${repRID}.aggrQC.log
@@ -1259,9 +1827,9 @@ process aggrQC {
     echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t'${readLengthM}'\t-" >> metadata.tsv
     if [ "${params.speciesForce}" == "" ]
     then
-      echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
+      echo -e "Inferred\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
     else
-      echo -e "Infered\t${speciesI} (FORCED)\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
+      echo -e "Inferred\t${speciesI} (FORCED)\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
     fi
     echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv
 
@@ -1285,147 +1853,6 @@ process aggrQC {
     """
 }
 
-/* 
- * uploadInputBag: uploads the input bag
-*/
-process uploadInputBag {
-  tag "${repRID}"
-
-  input:
-    path script_uploadInputBag
-    path credential, stageAs: "credential.json" from deriva_uploadInputBag
-    path inputBag from inputBag_uploadInputBag
-    val studyRID from studyRID_uploadInputBag
-
-  output:
-    path ("inputBagRID.csv") into inputBagRID_fl
-
-  when:
-    upload
-
-  script:
-  """
-  hostname > ${repRID}.uploadInputBag.log
-  ulimit -a >> ${repRID}.uploadInputBag.log
-
-  yr=\$(date +'%Y')
-  mn=\$(date +'%m')
-  dy=\$(date +'%d')
-
-  file=\$(basename -a ${inputBag})
-  md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
-  echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
-  size=\$(wc -c < ./\${file})
-  echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
-  
-  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
-  if [ "\${exist}" == "[]" ]
-  then
-      cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-      cookie=\${cookie:11:-1}
-
-      loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
-      inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
-      echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
-      rid=\${inputBag_rid}
-  else
-      exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-      exist=\${exist:7:-6}
-      echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
-      rid=\${exist}
-  fi
-
-  echo \${rid} > inputBagRID.csv
-  """
-}
-
-// Extract input bag RID into channel
-inputBagRID = Channel.create()
-inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
-  inputBagRID
-)
-
-/* 
- * uploadExecutionRun: uploads the execution run
-*/
-process uploadExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun
-    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
-    val spike from spikeInfer_uploadExecutionRun
-    val species from speciesInfer_uploadExecutionRun
-    val inputBagRID
-    
-  output:
-    path ("executionRunRID.csv") into executionRunRID_fl
-
-  when:
-    upload
-
-  script:
-  """
-  hostname > ${repRID}.uploadExecutionRun.log
-  ulimit -a >> ${repRID}.uploadExecutionRun.log
-
-  echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
-  workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
-  workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-  workflow=\${workflow:7:-6}
-  echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
-
-  if [ "${species}" == "Homo sapiens" ]
-  then
-    genomeName=\$(echo GRCh${refHuVersion})
-  elif [ "${species}" == "Mus musculus" ]
-  then
-    genomeName=\$(echo GRCm${refMoVersion})
-  fi
-  if [ "${spike}" == "yes" ]
-  then
-    genomeName=\$(echo \${genomeName}-S)
-  fi
-  echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
-  genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName}_indev)
-  genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-  genome=\${genome:7:-6}
-  echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
-
-  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-  cookie=\${cookie:11:-1}
-
-  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
-  echo \${exist} >> ${repRID}.uploadExecutionRun.log
-  if [ "\${exist}" == "[]" ]
-  then
-    executionRun_rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
-    echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-  else
-    rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    rid=\${rid:7:-6}
-    echo \${rid} >> ${repRID}.uploadExecutionRun.log
-    executionRun_rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
-    echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-  fi
-
-  echo \${executionRun_rid} > executionRunRID.csv
-  """
-}
-
-// Extract execution run RID into channel
-executionRunRID = Channel.create()
-executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
-  executionRunRID
-)
-
-//
-executionRunRID.into {
-  executionRunRID_uploadQC
-  executionRunRID_uploadProcessedFile
-  executionRunRID_uploadOutputBag
-}
-
 /* 
  * uploadQC: uploads the mRNA QC
 */
@@ -1442,13 +1869,20 @@ process uploadQC {
     val length from readLengthInfer_uploadQC
     val rawCount from rawReadsInfer_uploadQC
     val finalCount from assignedReadsInfer_uploadQC
-    
-    
+    val fastqCountError_uploadQC
+    val fastqReadError_uploadQC
+    val speciesError_uploadQC
+    val pipelineError_uploadQC
+
   output:
     path ("qcRID.csv") into qcRID_fl
 
   when:
     upload
+    fastqCountError_uploadQC == 'false'
+    fastqReadError_uploadQC == 'false'
+    speciesError_uploadQC == 'false'
+    pipelineError_uploadQC == 'false'
 
   script:
   """
@@ -1481,7 +1915,7 @@ process uploadQC {
   qc_rid=\$(python3 ${script_uploadQC} -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -o ${source} -c \${cookie} -u F)
   echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log
 
-  echo \${qc_rid} > qcRID.csv
+  echo "\${qc_rid}" > qcRID.csv
   """
 }
 
@@ -1511,12 +1945,20 @@ process uploadProcessedFile {
     val studyRID from studyRID_uploadProcessedFile
     val expRID from expRID_uploadProcessedFile
     val executionRunRID from executionRunRID_uploadProcessedFile
+    val fastqCountError_uploadProcessedFile
+    val fastqReadError_uploadProcessedFile
+    val speciesError_uploadProcessedFile
+    val pipelineError_uploadProcessedFile
 
   output:
     path ("${repRID}_Output_Bag.zip") into outputBag
 
   when:
     upload
+    fastqCountError_uploadProcessedFile == 'false'
+    fastqReadError_uploadProcessedFile == 'false'
+    speciesError_uploadProcessedFile == 'false'
+    pipelineError_uploadProcessedFile == 'false'
 
   script:
   """
@@ -1595,12 +2037,20 @@ process uploadOutputBag {
     path outputBag
     val studyRID from studyRID_uploadOutputBag
     val executionRunRID from executionRunRID_uploadOutputBag
+    val fastqCountError_uploadOutputBag
+    val fastqReadError_uploadOutputBag
+    val speciesError_uploadOutputBag
+    val pipelineError_uploadOutputBag
 
   output:
     path ("outputBagRID.csv") into outputBagRID_fl
 
   when:
     upload
+    fastqCountError_uploadOutputBag == 'false'
+    fastqReadError_uploadOutputBag == 'false'
+    speciesError_uploadOutputBag == 'false'
+    pipelineError_uploadOutputBag == 'false'
 
   script:
   """
@@ -1616,11 +2066,11 @@ process uploadOutputBag {
   echo LOG: ${repRID} output bag md5 sum - \${md5} >> ${repRID}.uploadOutputBag.log
   size=\$(wc -c < ./\${file})
   echo LOG: ${repRID} output bag size - \${size} bytes >> ${repRID}.uploadOutputBag.log
-  
+    
   exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=\${md5})
   if [ "\${exist}" == "[]" ]
   then
-      cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
       cookie=\${cookie:11:-1}
 
       loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/output_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
@@ -1634,7 +2084,7 @@ process uploadOutputBag {
       rid=\${exist}
   fi
 
-  echo \${rid} > outputBagRID.csv
+  echo "\${rid}" > outputBagRID.csv
   """
 }
 
@@ -1644,6 +2094,204 @@ outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
   outputBagRID
 )
 
+/* 
+ * finalizeExecutionRun: finalizes the execution run
+*/
+process finalizeExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun_finalizeExecutionRun
+    path credential, stageAs: "credential.json" from deriva_finalizeExecutionRun
+    val executionRunRID from executionRunRID_finalizeExecutionRun
+    val inputBagRID from inputBagRID_finalizeExecutionRun
+    val outputBagRID
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.finalizeExecutionRun.log
+  ulimit -a >> ${repRID}.finalizeExecutionRun.log
+
+  executionRun=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/RID=${executionRunRID})
+  workflow=\$(echo \${executionRun} | grep -o '\\"Workflow\\":.*\\"Reference' | grep -oP '(?<=\\"Workflow\\":\\").*(?=\\",\\"Reference)')
+  genome=\$(echo \${executionRun} | grep -o '\\"Reference_Genome\\":.*\\"Input_Bag' | grep -oP '(?<=\\"Reference_Genome\\":\\").*(?=\\",\\"Input_Bag)')
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  rid=\$(python3 ${script_uploadExecutionRun_finalizeExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Success -d 'Run Successful' -o ${source} -c \${cookie} -u ${executionRunRID})
+  echo LOG: execution run RID marked as successful - \${rid} >> ${repRID}.finalizeExecutionRun.log
+  """
+}
+
+/* 
+ * failPreExecutionRun: fail the execution run prematurely
+*/
+process failPreExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun_failPreExecutionRun
+    path credential, stageAs: "credential.json" from deriva_failPreExecutionRun
+    val spike from spikeMeta_failPreExecutionRun
+    val species from speciesMeta_failPreExecutionRun
+    val inputBagRID from inputBagRID_failPreExecutionRun
+    val fastqCountError from fastqCountError_failPreExecutionRun
+    val fastqCountError_details
+    val fastqReadError from fastqReadError_failPreExecutionRun
+    val fastqReadError_details
+    val speciesError from speciesError_failPreExecutionRun
+    val speciesError_details
+
+  when:
+    upload
+    fastqCountError == 'true' || fastqReadError == 'true' || speciesError == 'true'
+
+  script:
+  """
+  hostname > ${repRID}.failPreExecutionRun.log
+  ulimit -a >> ${repRID}.failPreExecutionRun.log
+
+  errorDetails=""
+  if [ ${fastqCountError} == true ]
+  then
+    errorDetails=\$(echo ${fastqCountError_details}"\\n")
+  elif [ ${fastqReadError} == true ]
+  then
+    errorDetails=\$(echo \$(errorDetails)${fastqReadError_details}"\\n")
+  elif [ ${speciesError} == true ]
+  then
+    errorDetails=\$(echo \$(errorDetails)${speciesError_details}"\\n")
+  fi
+
+  echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.failPreExecutionRun.log
+  workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
+  workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  workflow=\${workflow:7:-6}
+  echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.failPreExecutionRun.log
+
+  if [ "${species}" == "Homo sapiens" ]
+  then
+    genomeName=\$(echo GRCh${refHuVersion})
+  elif [ "${species}" == "Mus musculus" ]
+  then
+    genomeName=\$(echo GRCm${refMoVersion})
+  fi
+  if [ "${spike}" == "yes" ]
+  then
+    genomeName=\$(echo \${genomeName}-S)
+  fi
+  echo LOG: searching for genome name - \${genomeName} >> ${repRID}.failPreExecutionRun.log
+  genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
+  genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  genome=\${genome:7:-6}
+  echo LOG: genome RID extracted - \${genome} >> ${repRID}.failPreExecutionRun.log
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
+  echo \${exist} >> ${repRID}.failPreExecutionRun.log
+  if [ "\${exist}" == "[]" ]
+  then
+    rid=\$(python3 ${script_uploadExecutionRun_failPreExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u F)
+    echo LOG: execution run RID uploaded - \${rid} >> ${repRID}.failPreExecutionRun.log
+  else
+    rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+    rid=\${rid:7:-6}
+    echo \${rid} >> ${repRID}.failPreExecutionRun.log
+    executionRun_rid==\$(python3 ${script_uploadExecutionRun_failPreExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${errorDetails}" -o ${source} -c \${cookie} -u \${rid})
+    echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.failPreExecutionRun.log
+  fi
+  """
+}
+
+/* 
+ * failExecutionRun: fail the execution run
+*/
+process failExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun_failExecutionRun
+    path credential, stageAs: "credential.json" from deriva_failExecutionRun
+    val executionRunRID from executionRunRID_failExecutionRun
+    val inputBagRID from inputBagRID_failExecutionRun
+    val endsMeta from endsMeta_failExecutionRun
+    val endsRaw
+    val strandedMeta from strandedMeta_failExecutionRun
+    val spikeMeta from spikeMeta_failExecutionRun
+    val speciesMeta from speciesMeta_failExecutionRun
+    val endsInfer from endsInfer_failExecutionRun
+    val strandedInfer from strandedInfer_failExecutionRun
+    val spikeInfer from spikeInfer_failExecutionRun
+    val speciesInfer from speciesInfer_failExecutionRun
+    val pipelineError from pipelineError_failExecutionRun
+    val pipelineError_ends
+    val pipelineError_stranded
+    val pipelineError_spike
+    val pipelineError_species
+
+  when:
+    upload
+    pipelineError == 'true'
+
+  script:
+  """
+  hostname > ${repRID}.failExecutionRun.log
+  ulimit -a >> ${repRID}.failExecutionRun.log
+
+  executionRun=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/RID=${executionRunRID})
+  workflow=\$(echo \${executionRun} | grep -o '\\"Workflow\\":.*\\"Reference' | grep -oP '(?<=\\"Workflow\\":\\").*(?=\\",\\"Reference)')
+  genome=\$(echo \${executionRun} | grep -o '\\"Reference_Genome\\":.*\\"Input_Bag' | grep -oP '(?<=\\"Reference_Genome\\":\\").*(?=\\",\\"Input_Bag)')
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  errorDetails=""
+  if [ ${pipelineError} == false ]
+  then
+    rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Success -d 'Run Successful' -o ${source} -c \${cookie} -u ${executionRunRID})
+    echo LOG: execution run RID marked as successful - \${rid} >> ${repRID}.failExecutionRun.log
+  else
+    pipelineError_details=\$(echo "**Submitted metadata does not match inferred:**\\n")
+    pipelineError_details=\$(echo \${pipelineError_details}"|Metadata|Submitted value|Inferred value|\\n")
+    pipelineError_details=\$(echo \${pipelineError_details}"|:-:|-:|-:|\\n")
+    if ${pipelineError_ends}
+    then
+      if [ "${endsInfer}" == "se" ]
+      then
+        endInfer="Single End"
+      elif [ "${endsInfer}" == "pe" ]
+      then
+        endInfer="Paired End"
+      else
+        endInfer="unknown"
+      fi
+      pipelineError_details=\$(echo \${pipelineError_details}"|Paired End|${endsRaw}|"\${endInfer}"|\\n")
+    fi
+    if ${pipelineError_stranded}
+    then
+      pipelineError_details=\$(echo \${pipelineError_details}"|Strandedness|${strandedMeta}|${strandedInfer}|\\n")
+    fi
+    if ${pipelineError_spike}
+    then
+      pipelineError_details=\$(echo \${pipelineError_details}"|Used Spike Ins|${spikeMeta}|${spikeInfer}|\\n")
+    fi
+    if ${pipelineError_species}
+    then
+      pipelineError_details=\$(echo \${pipelineError_details}"|Species|${speciesMeta}|${speciesInfer}|\\n")
+    fi
+    pipelineError_details=\${pipelineError_details::-2}
+    rid=\$(python3 ${script_uploadExecutionRun_failExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s Error -d "\${pipelineError_details}" -o ${source} -c \${cookie} -u ${executionRunRID})
+    echo LOG: execution run RID marked as error - \${rid} >> ${repRID}.failExecutionRun.log
+  fi
+  """
+}
+
 
 workflow.onError = {
   subject = "$workflow.manifest.name FAILED: $params.repRID"
diff --git a/workflow/scripts/bdbag_fetch.sh b/workflow/scripts/bdbag_fetch.sh
index 606b88397d5a6cf4feb4aa38d7615e3e3ba48735..c34dc756d0cc5a47382fb9f96267e378c19ae79a 100644
--- a/workflow/scripts/bdbag_fetch.sh
+++ b/workflow/scripts/bdbag_fetch.sh
@@ -1,14 +1,25 @@
 #!/bin/bash
 
-if [ -z "${3}" ]
+bdbag --materialize ${1} --debug
+validate=""
+bdbag --validate full ${1} 2> validate.txt
+validate=$(tail -n1 validate.txt | grep -o 'is valid')
+if [ "${validate}" != "is valid" ]
 then
-    bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1}
-    for i in $(find */ -name "*R*.fastq.gz")
+    n=0
+    until [ "${n}" -ge "3" ]
     do
-        path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz")
-        cp ${i} ./${path}
+        bdbag --resolve-fetch missing --validate full ${1} --debug && validate=$(tail -n validate.txt | grep -o 'is valid') && break
+        n=$((n+1)) 
+        sleep 15
     done
-elif [ "${3}" == "TEST" ]
+fi
+if [ "${validate}" != "is valid" ]
 then
-    bdbag --resolve-fetch all --fetch-filter filename\$*.txt ${1}
+    exit 1
 fi
+for i in $(find */ -name "*R*.fastq.gz")
+do
+    path=${2}.$(echo ${i##*/} | grep -o "R[1,2].fastq.gz")
+    cp ${i} ./${path}
+done
\ No newline at end of file
diff --git a/workflow/scripts/parse_meta.py b/workflow/scripts/parse_meta.py
index 16411df357555991fefdcbd65a6cf0f1f0667017..12cc7c7233b94509e5c3a7307e8ef7985a94a958 100644
--- a/workflow/scripts/parse_meta.py
+++ b/workflow/scripts/parse_meta.py
@@ -62,12 +62,7 @@ def main():
 
     # Get endedness metadata from 'Experiment Settings.csv'
     if (args.parameter == "endsMeta"):
-        if (metaFile.Paired_End.unique() == "Single End"):
-            endsMeta = "se"
-        elif (metaFile.Paired_End.unique() == "Paired End"):
-            endsMeta = "pe"
-        else:
-            endsMeta = "uk"
+        endsMeta = metaFile.Paired_End.unique()[0]
         print(endsMeta)
 
     # Manually get endness count from 'File.csv'
diff --git a/workflow/scripts/split_study.sh b/workflow/scripts/split_study.sh
index 1f82af6132dad6148adf506a34769c0af1fe9992..aeec0fa1d1fc8e7fe41eaa2c332eae55fe2f0c3e 100644
--- a/workflow/scripts/split_study.sh
+++ b/workflow/scripts/split_study.sh
@@ -9,13 +9,13 @@ echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/en
 
 # extract replicate RIDs
 module load python/3.6.4-anaconda
-python3 ./workflow/scripts/splitStudy.py -s $1
+python3 ./workflow/scripts/split_study.py -s $1
 
 # run pipeline on replicate RIDs in parallel
 module load nextflow/20.01.0
 module load singularity/3.5.3
-while read repRID; do echo ${repRID}; sleep 15; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/rna-seq.nf --repRID {}
+while read repRID; do echo ${repRID}; sleep 30; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/rna-seq.nf --repRID {} --source production --deriva /project/BICF/BICF_Core/shared/gudmap/test_data/auth/credential.json --bdbag /project/BICF/BICF_Core/shared/gudmap/test_data/auth/cookies.txt --dev false --upload true --email gervaise.henry@utsouthwestern.edu -with-report ./output/{}_report.html -with-timeline ./output/{}_timeline.html
 
 # cleanup study RID files
 rm $1_studyRID.json
-rm $1_studyRID.csv
+#rm $1_studyRID.csv
diff --git a/workflow/scripts/upload_execution_run.py b/workflow/scripts/upload_execution_run.py
index 5af8565ab0426bd32dc886188a0347360ff4b42c..2e8ea8de7745a3f048b580486f20e25d8904dd0c 100644
--- a/workflow/scripts/upload_execution_run.py
+++ b/workflow/scripts/upload_execution_run.py
@@ -31,7 +31,7 @@ def main(hostname, catalog_number, credential):
             "Input_Bag": args.inputBagRID,
             "Notes": args.notes,
             "Execution_Status": args.status,
-            "Execution_Status_Detail": args.statusDetail
+            "Execution_Status_Detail": args.statusDetail.replace('\\n','\n')
         }
         entities = run_table.insert([run_data])
         rid = entities[0]["RID"]
@@ -44,7 +44,7 @@ def main(hostname, catalog_number, credential):
             "Input_Bag": args.inputBagRID,
             "Notes": args.notes,
             "Execution_Status": args.status,
-            "Execution_Status_Detail": args.statusDetail
+            "Execution_Status_Detail": args.statusDetail.replace('\\n','\n')
         }
         entities = run_table.update([run_data])
         rid = args.update
diff --git a/workflow/tests/test_parseMetadata.py b/workflow/tests/test_parseMetadata.py
index fa488800e4b6aeb5b1be2685d75b2801667f0855..5a14fcd885b79d944e46de5d936d17fc941def7b 100644
--- a/workflow/tests/test_parseMetadata.py
+++ b/workflow/tests/test_parseMetadata.py
@@ -19,7 +19,7 @@ def readLine(fileName):
     data = False
     file = open(fileName, "r")
     line = file.readline()
-    if line.strip() == "uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID":
+    if line.strip() == "uk,uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID":
         data = True
 
     return data