diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index af9500114677d3015545c24945032b2e49454873..141fc56791d1cea908250ece29e2b32dfe6a8355 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,25 +2,136 @@ before_script:
   - module load python/3.6.4-anaconda
   - pip install --user attrs==20.3.0 pytest==6.2.2 pytest-pythonpath==0.7.3 pytest-cov==2.11.1
   - module load singularity/3.5.3
+  - export SINGULARITY_CACHEDIR=${dir}cache/
   - module load nextflow/20.01.0
   - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
   - mkdir -p ~/.deriva
   - mkdir -p ~/.bdbag
 
+after_script:
+  - unset SINGULARITY_CACHEDIR
+
 variables:
   refMoVersion: "38.p6.vM25"
   refHuVersion: "38.p13.v36"
   refERCCVersion: "92"
+  dir: "/project/BICF/BICF_Core/shared/gudmap/singularity_cache/"
 
 stages:
+  - singularity
+  - versions
+  - aggregation
   - badges
   - deploy
   - unit
-  - aggregation
   - reference
   - integration
   - consistency
 
+
+img_cache:
+  stage: singularity
+  script:
+    - mkdir -p ${dir}cache/
+    - cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | xargs -P 3 -I {} bash -c "singularity pull --dir ${dir} 'docker://'{} || true"
+    - wait
+    - echo images cached
+
+collect:
+  stage: versions
+  script:
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-download-cli --version > version_deriva.txt
+    - singularity run ${dir}${derivaImg}_${derivaVar}.sif bdbag --version > version_bdbag.txt
+    - pythonImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f1)
+    - pythonVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${pythonImg}_${pythonVar}.sif
+    - singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 --version > version_python.txt
+    - fastqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f1)
+    - fastqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${fastqcImg}_${fastqcVar}.sif
+    - singularity run ${dir}${fastqcImg}_${fastqcVar}.sif fastqc --version > version_fastqc.txt
+    - seqwhoImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f1)
+    - seqwhoVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${seqwhoImg}_${seqwhoVar}.sif
+    - singularity run ${dir}${seqwhoImg}_${seqwhoVar}.sif seqwho.py -h | grep -o Version.* > version_seqwho.txt &
+    - trimgaloreImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f1)
+    - trimgaloreVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${trimgaloreImg}_${trimgaloreVar}.sif
+    - singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --version > version_trimgalore.txt
+    - seqtkImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f1)
+    - seqtkVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${seqtkImg}_${seqtkVar}.sif
+    - singularity run ${dir}${seqtkImg}_${seqtkVar}.sif seqtk 2>&1 | grep -o Version.* > version_seqtk.txt &
+    - rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
+    - rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${rseqcImg}_${rseqcVar}.sif
+    - singularity run ${dir}${rseqcImg}_${rseqcVar}.sif infer_experiment.py --version > version_rseqc.txt
+    - hisatImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f1)
+    - hisatVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${hisatImg}_${hisatVar}.sif
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 --version > version_hisat2.txt
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools --version > version_samtools.txt
+    - picardImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f1)
+    - picardVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${picardImg}_${picardVar}.sif
+    - singularity run ${dir}${picardImg}_${picardVar}.sif java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt &
+    - subreadImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f1)
+    - subreadVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${subreadImg}_${subreadVar}.sif
+    - singularity run ${dir}${subreadImg}_${subreadVar}.sif featureCounts -v &> version_featurecounts.txt
+    - singularity run ${dir}${subreadImg}_${subreadVar}.sif R --version > version_r.txt
+    - deeptoolsImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f1)
+    - deeptoolsVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${deeptoolsImg}_${deeptoolsVar}.sif
+    - singularity run ${dir}${deeptoolsImg}_${deeptoolsVar}.sif deeptools --version > version_deeptools.txt
+    - multiqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep multiqc | cut -d"/" -f2 | cut -d":" -f1)
+    - multiqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep multiqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${multiqcImg}_${multiqcVar}.sif
+    - singularity run ${dir}${multiqcImg}_${multiqcVar}.sif multiqc --version > version_multiqc.txt
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - version_deriva.txt
+      - version_bdbag.txt
+      - version_python.txt
+      - version_fastqc.txt
+      - version_seqwho.txt
+      - version_trimgalore.txt
+      - version_seqtk.txt
+      - version_rseqc.txt
+      - version_hisat2.txt
+      - version_samtools.txt
+      - version_markdups.txt
+      - version_featurecounts.txt
+      - version_r.txt
+      - version_deeptools.txt
+      - version_multiqc.txt
+    expire_in: 7 days
+
+generateVersions:
+  stage: aggregation
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+    - schedules
+  script:
+    - python ./workflow/scripts/generate_versions.py -o software_versions
+    - python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - software_references_mqc.yaml
+      - software_versions_mqc.yaml
+    expire_in: 7 days
+
+
 build_badges:
   stage: badges
   only:
@@ -32,8 +143,11 @@ build_badges:
     - chmod +x ./workflow/scripts/get_updated_badge_info.sh
   script:
     - echo "Building badges"
-    - singularity run 'docker://gudmaprbk/gudmap-rbk_base:1.0.0' bash ./workflow/scripts/get_updated_badge_info.sh
-    - singularity run 'docker://gudmaprbk/gudmap-rbk_base:1.0.0' bash ./workflow/scripts/get_updated_rep_count.sh
+    - baseImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep gudmap-rbk_base | cut -d"/" -f2 | cut -d":" -f1)
+    - baseVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep gudmap-rbk_base | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${baseImg}_${baseVar}.sif
+    - singularity run ${dir}${baseImg}_${baseVar}.sif bash ./workflow/scripts/get_updated_badge_info.sh
+    - singularity run ${dir}${baseImg}_${baseVar}.sif bash ./workflow/scripts/get_updated_rep_count.sh
   artifacts:
     paths:
       - badges/
@@ -62,16 +176,12 @@ getBag:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
-  - singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-download-cli --version > version_deriva.txt
-  - singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-download-cli staging.gudmap.org --catalog 2 ./workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
-  - pytest -m getBag
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_deriva.txt
-    expire_in: 7 days
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
+    - singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-download-cli staging.gudmap.org --catalog 2 ./workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
+    - pytest -m getBag
 
 getData:
   stage: unit
@@ -82,17 +192,13 @@ getData:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' bdbag --version > version_bdbag.txt
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxtest.zip
-  - singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6
-  - pytest -m getData
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_bdbag.txt
-    expire_in: 7 days
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    -  echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxtest.zip
+    - singularity run ${dir}${derivaImg}_${derivaVar}.sif bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6
+    - pytest -m getData
 
 parseMetadata:
   stage: unit
@@ -103,27 +209,23 @@ parseMetadata:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/python3:1.0.0' python3 --version > version_python.txt
-  - rep=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
-  - exp=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
-  - study=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
-  - endsRaw=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
-  - endsMeta="uk"
-  - endsManual="se"
-  - stranded=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
-  - spike=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
-  - species=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
-  - readLength=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength)
-  - echo -e "${endsMeta},${endsRaw},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
-  - pytest -m parseMetadata
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_python.txt
-    expire_in: 7 days
+    - pythonImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f1)
+    - pythonVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${pythonImg}_${pythonVar}.sif
+    - rep=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
+    - exp=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
+    - study=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
+    - endsRaw=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
+    - stranded=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
+    - spike=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
+    - species=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
+    - readLength=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength)
+    - endsMeta="uk"
+    - endsManual="se" 
+    - echo -e "${endsMeta},${endsRaw},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
+    - pytest -m parseMetadata
 
-inferMetadata:
+fastqc:
   stage: unit
   only:
     - push
@@ -132,23 +234,13 @@ inferMetadata:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py --version > version_rseqc.txt
-  - >
-    align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
-    if [[ ${align} == "" ]]; then exit 1; fi
-  - >
-    singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
-    ended=`singularity run 'gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/infer_meta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
-    if [[ ${ended} == "" ]]; then exit 1; fi
-  - pytest -m inferMetadata
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_rseqc.txt
-    expire_in: 7 days
+    - fastqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f1)
+    - fastqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${fastqcImg}_${fastqcVar}.sif
+    - singularity run ${dir}${fastqcImg}_${fastqcVar}.sif fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
+    - pytest -m fastqc
 
-trimData:
+seqwho:
   stage: unit
   only:
     - push
@@ -157,20 +249,15 @@ trimData:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --version > version_trimgalore.txt
-  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
-  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
-  - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
-  - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
-  - pytest -m trimData
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_trimgalore.txt
-    expire_in: 7 days
+    - seqwhoImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f1)
+    - seqwhoVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${seqwhoImg}_${seqwhoVar}.sif
+    - wget -O SeqWho.ix https://cloud.biohpc.swmed.edu/index.php/s/eeNWqZz8jqN5zWY/download
+    - mkdir -p SeqWho_call_plots/test_data/fastq/small/
+    - singularity run ${dir}${seqwhoImg}_${seqwhoVar}.sif seqwho.py -f test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -x SeqWho.ix
+    - pytest -m seqwho
 
-downsampleData:
+trimData:
   stage: unit
   only:
     - push
@@ -179,10 +266,16 @@ downsampleData:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
-  - pytest -m downsampleData
+    - trimgaloreImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f1)
+    - trimgaloreVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${trimgaloreImg}_${trimgaloreVar}.sif
+    - singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
+    - singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
+    - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
+    - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
+    - pytest -m trimData
 
-alignData:
+downsampleData:
   stage: unit
   only:
     - push
@@ -191,26 +284,13 @@ alignData:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 --version > version_hisat2.txt
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools --version > version_samtools.txt
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
-  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
-  - pytest -m alignData
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_hisat2.txt
-      - version_samtools.txt
-    expire_in: 7 days
+    - seqtkImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f1)
+    - seqtkVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${seqtkImg}_${seqtkVar}.sif
+    - singularity run ${dir}${seqtkImg}_${seqtkVar}.sif seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
+    - pytest -m downsampleData
 
-dedupData:
+inferMetadata:
   stage: unit
   only:
     - push
@@ -219,25 +299,18 @@ dedupData:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools --version > version_samtools.txt
-  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt&
-  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
-  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
-  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
-  - >
-    for i in {"chr8","chr4","chrY"}; do
-      echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
-    done | singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' parallel -j 20 -k
-  - pytest -m dedupData
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_markdups.txt
-      - version_samtools.txt
-    expire_in: 7 days
-
-countData:
+    - rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
+    - rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${rseqcImg}_${rseqcVar}.sif
+    - >
+      align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
+      if [[ ${align} == "" ]]; then exit 1; fi
+    - singularity run ${dir}${rseqcImg}_${rseqcVar}.sif infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
+    - ended=`singularity run ${dir}${rseqcImg}_${rseqcVar}.sif python3 ./workflow/scripts/infer_meta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
+      if [[ ${ended} == "" ]]; then exit 1; fi
+    - pytest -m inferMetadata
+  
+alignData:
   stage: unit
   only:
     - push
@@ -246,24 +319,20 @@ countData:
     - merge_requests
     - schedules
   script:
-  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/geneID.tsv
-  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/Entrez.tsv
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/sequence/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se_countData
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
-  - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -v &> version_featurecounts.txt
-  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' R --version > version_r.txt
-  - pytest -m makeFeatureCounts
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_featurecounts.txt
-      - version_r.txt
-    expire_in: 7 days
+    - hisatImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f1)
+    - hisatVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${hisatImg}_${hisatVar}.sif
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
+    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
+    - pytest -m alignData
 
-makeBigWig:
+dedupData:
   stage: unit
   only:
     - push
@@ -272,17 +341,18 @@ makeBigWig:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/deeptools3.5.0:1.0.0' deeptools --version > version_deeptools.txt
-  - singularity run 'docker://gudmaprbk/deeptools3.5.0:1.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
-  - pytest -m makeBigWig
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_deeptools.txt
-    expire_in: 7 days
+    - picardImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f1)
+    - picardVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${picardImg}_${picardVar}.sif
+    - singularity run ${dir}${picardImg}_${picardVar}.sif java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
+    - singularity run ${dir}${picardImg}_${picardVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
+    - singularity run ${dir}${picardImg}_${picardVar}.sif samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
+    - for i in {"chr8","chr4","chrY"}; do
+        echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
+      done | singularity run ${dir}${picardImg}_${picardVar}.sif parallel -j 20 -k
+    - pytest -m dedupData
 
-fastqc:
+countData:
   stage: unit
   only:
     - push
@@ -291,60 +361,51 @@ fastqc:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/fastqc0.11.9:1.0.0' fastqc --version > version_fastqc.txt
-  - singularity run 'docker://gudmaprbk/fastqc0.11.9:1.0.0' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
-  - pytest -m fastqc
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - version_fastqc.txt
-    expire_in: 7 days
-
+    - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/geneID.tsv
+    - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/Entrez.tsv
+    - subreadImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f1)
+    - subreadVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${subreadImg}_${subreadVar}.sif
+    - singularity run ${dir}${subreadImg}_${subreadVar}.sif featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/sequence/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
+    - singularity run ${dir}${subreadImg}_${subreadVar}.sif Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se_countData
+    - singularity run ${dir}${subreadImg}_${subreadVar}.sif Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
+    - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
+    - pytest -m makeFeatureCounts
 
-dataQC:
+makeBigWig:
   stage: unit
   only:
     - push
     - tags
   except:
     - merge_requests
+    - schedules
   script:
-  - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
-  - >
-    for i in {"chr8","chr4","chrY"}; do
-      echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
-    done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
-  - pytest -m dataQC
+    - deeptoolsImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f1)
+    - deeptoolsVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${deeptoolsImg}_${deeptoolsVar}.sif
+    - singularity run ${dir}${deeptoolsImg}_${deeptoolsVar}.sif bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
+    - pytest -m makeBigWig
 
-uploadInputBag:
+dataQC:
   stage: unit
   only:
     - push
     - tags
   except:
     - merge_requests
-    - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
-  - echo THIS IS A TEST FILE > test.txt
-  - >
-    md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
-    size=$(wc -c < ./test.txt) &&
-    exist=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=${md5}) &&
-    if [ "${exist}" == "[]" ]; then
-      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
-      cookie=${cookie:11:-1} &&
-      loc=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/input_bag/TEST/test.txt --parents) &&
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_input_bag.py -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test input bag' -o staging.gudmap.org -c ${cookie}) &&
-      echo ${rid} test input bag created
-    else
-      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
-      rid=${rid:8:-6} &&
-      echo ${rid} test input bag already exists
-    fi
+    - rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
+    - rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${rseqcImg}_${rseqcVar}.sif
+    - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
+    - >
+      for i in {"chr8","chr4","chrY"}; do
+        echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
+      done | singularity run ${dir}${rseqcImg}_${rseqcVar}.sif parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
+    - pytest -m dataQC
 
-uploadExecutionRun:
+uploadInputBag:
   stage: unit
   only:
     - push
@@ -353,22 +414,28 @@ uploadExecutionRun:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
-  - >
-    exist=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Replicate=17-BTFJ) &&
-    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
-    cookie=${cookie:11:-1} &&
-    if [ "${exist}" == "[]" ]; then
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
-      echo ${rid} test execution run created
-    else
-      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
-      rid=${rid:7:-6} &&
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
-      echo ${rid} test execution run already exists
-    fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+    - echo THIS IS A TEST FILE > test.txt
+    - md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
+    - size=$(wc -c < ./test.txt) &&
+    - >
+      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=${md5}) &&
+      if [ "${exist}" == "[]" ]; then
+        cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+        cookie=${cookie:11:-1} &&
+        loc=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/input_bag/TEST/test.txt --parents) &&
+        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_input_bag.py -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test input bag' -o staging.gudmap.org -c ${cookie}) &&
+        echo ${rid} test input bag created
+      else
+        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+        rid=${rid:8:-6} &&
+        echo ${rid} test input bag already exists
+      fi
 
-uploadQC:
+uploadExecutionRun:
   stage: unit
   only:
     - push
@@ -377,22 +444,25 @@ uploadQC:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
-  - >
-    exist=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=17-BTFJ) &&
-    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
-    cookie=${cookie:11:-1} &&
-    if [ "${exist}" != "[]" ]; then
-      rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
-      for rid in ${rids}; do
-        singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/delete_entry.py -r ${rid} -t mRNA_QC -o staging.gudmap.org -c ${cookie}
-      done
-      echo all old mRNA QC RIDs deleted
-    fi
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single End" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
-      echo ${rid} test mRNA QC created
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+    - >
+      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Replicate=17-BTFJ) &&
+      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+      cookie=${cookie:11:-1} &&
+      if [ "${exist}" == "[]" ]; then
+        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
+        echo ${rid} test execution run created
+      else
+        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+        rid=${rid:7:-6} &&
+        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
+        echo ${rid} test execution run already exists
+      fi
 
-uploadProcessedFile:
+uploadQC:
   stage: unit
   only:
     - push
@@ -401,29 +471,25 @@ uploadProcessedFile:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
-  - echo THIS IS A TEST FILE > 17-BTFJ_test.csv
-  - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/
-  - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/17-BTFJ_test.csv
-  - >
-    exist=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=17-BTFJ) &&
-    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
-    cookie=${cookie:11:-1} &&
-    if [ "${exist}" != "[]" ]; then
-      rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
-      for rid in ${rids}; do
-        singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/delete_entry.py -r ${rid} -t Processed_File -o staging.gudmap.org -c ${cookie}
-      done
-      echo all old processed file RIDs deleted
-    fi
-      singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-upload-cli --catalog 2 --token ${cookie:9} staging.gudmap.org ./deriva
-      echo test processed file uploaded
-  - mkdir test
-  - singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' bdbag test --archiver zip
-  - echo test output bag created
-  - pytest -m outputBag
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+    - >
+      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=17-BTFJ) &&
+      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+      cookie=${cookie:11:-1} &&
+      if [ "${exist}" != "[]" ]; then
+        rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
+        for rid in ${rids}; do
+          singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/delete_entry.py -r ${rid} -t mRNA_QC -o staging.gudmap.org -c ${cookie}
+        done
+        echo all old mRNA QC RIDs deleted
+      fi
+        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single End" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
+        echo ${rid} test mRNA QC created
 
-uploadOutputBag:
+uploadProcessedFile:
   stage: unit
   only:
     - push
@@ -432,27 +498,33 @@ uploadOutputBag:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
-  - echo THIS IS A TEST FILE > test.txt
-  - >
-    md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
-    size=$(wc -c < ./test.txt) &&
-    exist=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=${md5}) &&
-    if [ "${exist}" == "[]" ]; then
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+    - echo THIS IS A TEST FILE > 17-BTFJ_test.csv
+    - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/
+    - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/17-BTFJ_test.csv
+    - >
+      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=17-BTFJ) &&
       cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
       cookie=${cookie:11:-1} &&
-      loc=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/output_bag/TEST/test.txt --parents) &&
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_output_bag.py -e 17-BVDJ -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
-      echo ${rid} test output bag created
-    else
-      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
-      rid=${rid:8:-6} &&
-      echo ${rid} test output bag already exists
-    fi
-
+      if [ "${exist}" != "[]" ]; then
+        rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
+        for rid in ${rids}; do
+          singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/delete_entry.py -r ${rid} -t Processed_File -o staging.gudmap.org -c ${cookie}
+        done
+        echo all old processed file RIDs deleted
+      fi
+        singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-upload-cli --catalog 2 --token ${cookie:9} staging.gudmap.org ./deriva
+        echo test processed file uploaded
+    - mkdir test
+    - singularity run ${dir}${derivaImg}_${derivaVar}.sif bdbag test --archiver zip
+    - echo test output bag created
+    - pytest -m outputBag
 
-generateVersions:
-  stage: aggregation
+uploadOutputBag:
+  stage: unit
   only:
     - push
     - tags
@@ -460,16 +532,26 @@ generateVersions:
     - merge_requests
     - schedules
   script:
-  - singularity run 'docker://gudmaprbk/multiqc1.9:1.0.0' multiqc --version > version_multiqc.txt
-  - python ./workflow/scripts/generate_versions.py -o software_versions
-  - python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
-  artifacts:
-    name: "$CI_JOB_NAME"
-    when: always
-    paths:
-      - software_references_mqc.yaml
-      - software_versions_mqc.yaml
-    expire_in: 7 days
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+    - echo THIS IS A TEST FILE > test.txt
+    - >
+      md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
+      size=$(wc -c < ./test.txt) &&
+      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=${md5}) &&
+      if [ "${exist}" == "[]" ]; then
+        cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+        cookie=${cookie:11:-1} &&
+        loc=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/output_bag/TEST/test.txt --parents) &&
+        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_output_bag.py -e 17-BVDJ -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
+        echo ${rid} test output bag created
+      else
+        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+        rid=${rid:8:-6} &&
+        echo ${rid} test output bag already exists
+      fi
 
 
 human_BioHPC:
@@ -481,8 +563,8 @@ human_BioHPC:
     - merge_requests
     - schedules
   script:
-  - mkdir -p hu
-  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./hu/
+    - mkdir -p hu
+    - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./hu/
 
 mouse_BioHPC:
   stage: reference
@@ -493,8 +575,8 @@ mouse_BioHPC:
     - merge_requests
     - schedules
   script:
-  - mkdir -p mo
-  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./mo/
+    - mkdir -p mo
+    - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./mo/
 
 human_dev:
   stage: reference
@@ -505,22 +587,25 @@ human_dev:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=dev.gudmap.org
-  - refName=GRCh
-  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=dev.gudmap.org
+    - refName=GRCh
+    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 mouse_dev:
   stage: reference
@@ -531,22 +616,25 @@ mouse_dev:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=dev.gudmap.org
-  - refName=GRCm
-  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=dev.gudmap.org
+    - refName=GRCm
+    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 human_staging:
   stage: reference
@@ -557,22 +645,25 @@ human_staging:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=staging.gudmap.org
-  - refName=GRCh
-  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=staging.gudmap.org
+    - refName=GRCh
+    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 mouse_staging:
   stage: reference
@@ -583,23 +674,26 @@ mouse_staging:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=staging.gudmap.org
-  - refName=GRCm
-  - refHuVersion=38.p6.vM22
-  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=staging.gudmap.org
+    - refName=GRCm
+    - refHuVersion=38.p6.vM22
+    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 human_prod:
   stage: reference
@@ -610,22 +704,25 @@ human_prod:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=www.gudmap.org
-  - refName=GRCh
-  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=www.gudmap.org
+    - refName=GRCh
+    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 mouse_prod:
   stage: reference
@@ -636,23 +733,26 @@ mouse_prod:
     - merge_requests
     - schedules
   script:
-  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - referenceBase=www.gudmap.org
-  - refName=GRCm
-  - refHuVersion=38.p6.vM22
-  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
-  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
-  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
-  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
-  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
-  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
-  - loc=$(dirname ${refURL})
-  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
-  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
-  - test=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
-  - test=$(echo ${test} | grep -o ${filename})
-  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
+    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
+    - echo ${dir}${derivaImg}_${derivaVar}.sif
+    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+    - referenceBase=www.gudmap.org
+    - refName=GRCm
+    - refHuVersion=38.p6.vM22
+    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
+    - curl --request GET ${query} > refQuery.json
+    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+    - loc=$(dirname ${refURL})
+    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+    - test=$(echo ${test} | grep -o ${filename})
+    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
 
 
 integration_se:
@@ -662,11 +762,11 @@ integration_se:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
-  - pytest -m completionMultiqc --filename SE_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --refSource datahub --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
+    - pytest -m completionMultiqc --filename SE_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -687,11 +787,11 @@ integration_pe:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
-  - pytest -m completionMultiqc --filename PE_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
+    - pytest -m completionMultiqc --filename PE_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -707,76 +807,76 @@ integration_pe:
       - always
 
 
-failAmbiguousSpecies:
+failTrunkation:
   stage: integration
   only: [merge_requests]
   except:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
   retry:
     max: 0
     when:
       - always
 
-failTrunkation:
+failMismatchR1R2:
   stage: integration
   only: [merge_requests]
   except:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
   retry:
     max: 0
     when:
       - always
 
-failMismatchR1R2:
+failUnexpectedMeta:
   stage: integration
   only: [merge_requests]
   except:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
   retry:
     max: 0
     when:
       - always
 
-failUnexpectedMeta:
+failFileStructure:
   stage: integration
   only: [merge_requests]
   except:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
   retry:
     max: 0
     when:
       - always
 
-failFileStructure:
+failSeqType:
   stage: integration
   only: [merge_requests]
   except:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-DNDJ --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failSeqType_report.html
   retry:
     max: 0
     when:
@@ -789,12 +889,12 @@ override_inputBag:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
-  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
-  - pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
+    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
+    - pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -814,12 +914,12 @@ override_fastq:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6  --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
-  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
-  - pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6    --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
+    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
+    - pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -839,12 +939,12 @@ override_species:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
-  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
-  - pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
+    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
+    - pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -864,12 +964,12 @@ override_stranded:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
-  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
-  - pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
+    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
+    - pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -889,12 +989,12 @@ override_spike:
     variables:
       - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - hostname
-  - ulimit -a
-  - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
-  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
-  - pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
+    - hostname
+    - ulimit -a
+    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
+    - find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
+    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
+    - pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
@@ -915,12 +1015,12 @@ consistency:
     variables:
         - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
-  - pytest -m consistencySE
-  - pytest -m consistencyPE
+    - pytest -m consistencySE
+    - pytest -m consistencyPE
   artifacts:
     name: "$CI_JOB_NAME"
     when: always
     paths:
       - SE_multiqc_data.json
       - PE_multiqc_data.json
-    expire_in: 7 days
+    expire_in: 7 days
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dcbde13917e1eaa3c43a60cfa75a86e478dd3f6..57bc30dfed523968bfa2fd0d078f10f25b5a6be9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,11 @@
 * Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
 * Upload empty mRNA_QC entry if data error (#111)
 * Allow forcing of strandedness and spike (#100)
+* Add seqwho
+* Add seqwho results to multiqc report
 * Modify repository structure to allow for use with XPACK-DNANEXUS
+* Add override for endness
+* Add seqtk to references
 
 **Background**
 * Add memory limit (75%) per thread for samtools sort (#108)
@@ -27,11 +31,18 @@
 * Add new CI py tests for override and integration
 * Fix fastq file and species error status detail bub (#118)
 * Make compatible with XPACK-DNANEXUS
+* Don't download fastq's if fastq override present
+* Override fastq count to override counts
+* Change ambiguous species ci to wrong species
 
 *Known Bugs*
 * Override params (inputBag, fastq, species) aren't checked for integrity
 * Authentication files and tokens must be active (active auth client) for the duration of the pipeline run (until long-lived token utilization included)
 * Check for outputBag in hatrac doesn't check for any uploaded by chaise
+* CI container cache will fail if cache folder is not owned by CI runner user
+* CI container cache will not error if container failed to pull
+* CI (container cache, version collection, and unit tests) will not work correctly if containers reffered to in nextflow.config aren't prefixed perfectly with: "container = "
+  * also, it is assumed that the containers are on dockerhub and don't have the "docker://" prefix
 
 <hr>
 
diff --git a/README.md b/README.md
index 04f49d7d7d3ebe739023ccfa30a099136e48b19d..419dc687c27a0cdc7fa77dbc0718c550beaa4ee3 100644
--- a/README.md
+++ b/README.md
@@ -56,12 +56,14 @@ To Run:
   * `--inputBagForce` utilizes a local replicate inputBag instead of downloading from the data-hub (still requires accurate repRID input)
     * eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
   * `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
-    * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
+    * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order, also consider using `endsForce` if the endness doesn't match submitted value)
   * `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses a metadata mismatch or an ambiguous species error
     * eg: `--speciesForce 'Mus musculus'`
+  * `--endsForce` forces the endness to be "se", or "pe", it bypasses a metadata mismatch error
+    * eg: `--endsForce 'pe'`
   * `--strandedForce` forces the strandedness to be "forward", "reverse" or "unstranded", it bypasses a metadata mismatch error
     * eg: `--strandedForce 'unstranded'`
-  * `--spikeForce` forces the spike-in to be "false" or "true", it bypasses a metadata mismatch error
+  * `--spikeForce` forces the spike-in to be "false", or "true", it bypasses a metadata mismatch error
     * eg: `--spikeForce 'true'`
 * Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
   * `--ci` boolean (default = false)
@@ -86,6 +88,15 @@ This pipeline is also capable of being run on AWS and DNAnexus. To do so:
 * Add `-profile` with the name aws config which was customized
 ### DNAnexus (utilizes the [DNAnexus extension package for Nextflow (XPACK-DNANEXUS)](https://github.com/seqeralabs/xpack-dnanexus))
 * Follow the istructions from [XPACK-DNANEXUS](https://github.com/seqeralabs/xpack-dnanexus) about installing and authenticating (a valid license must be available for the extension package from Seqera Labs, as well as a subsription with DNAnexus)
+* The nf-dxapp needs to be built with a custom scm config to allow nextflow to pull the pipeline from the UTSW self-hosted GitLab server (git.biohpc.swmed.edu)
+```
+providers {
+    bicf {
+        server = 'https://git.biohpc.swmed.edu'
+        platform = 'gitlab'
+    }
+}
+```
 * Follow the instructions from [XPACK-DNANEXUS](https://github.com/seqeralabs/xpack-dnanexus) about launching runs. A template *json* file has been included ([dnanexusExample.json](docs/dnanexusExample.json))
   * `[version]` should be replaced with the pipeline version required (eg: `v2.0.0`)
   * `[credential.json]` should be replaced with the location of the credential file outpted by authentification with Deriva
@@ -110,7 +121,11 @@ Error reported back to the data-hub are (they aren't thrown on the command line
 |**Number of fastqs detected does not match submitted endness**|Single-end sequenced replicates can only have one fastq, while paried\-end can only have two (see above).|
 |**Number of reads do not match for R1 and R2**|For paired\-end sequenced studies the number of reads in read\-1 fastq must match that of read\-2. This error is usually indicative of uploading of currupted, trunkated, or wrong fastq files.|
 |**There is an error with the structure of the fastq**|The fastq's fail a test of their structure. This error is usually indicative of uploading of currupted, trunkated, or wrong fastq files.|
-|**Inference of species returns an ambiguous result**|Species of the replicate is done by aligning a random subset of 1 million reads from the data to both the human and mouse reference genomes. If there isn't a clear difference between the alignment rates (`>=40%` of one species, but `<40%` of the other), then this error is detected.|
+|**Infered species does not match for R1 and R2**|The species inferred from each read does not match. This error is usually indicative of uploading of wrong fastq files.|
+|**Infered species confidence is low**|The confidence of the species inferrence call is low. This is usually indicative of very low quality samples.|
+|**Infered sequencing type is not mRNA-seq**|The sequence type inferred is not mRNA-seq. This is usually indicative of uploading wrong fastq files.|
+|**Infered sequencing type does not match for R1 and R2**|The sequencing type inferred from each read does not match. This error is usually indicative of uploading of wrong fastq files.|
+|**Infered species confidence is low**|The confidence of the species inferrence call is low AND 3 sets of a random sampling of the fastq's do not match. This is usually indicative of very low quality samples.|
 |**Submitted metadata does not match inferred**|All required metadata for analysis of the data is internally inferred by the pipeline, if any of those do not match the submitted metadata, this error is detected to notify of a potential error. The mismatched metadata will be listed.|
 
 <hr>
diff --git a/conf/.gitkeep b/conf/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/conf/Execution_Run_For_Output_Bag.json b/conf/Execution_Run_For_Output_Bag.json
deleted file mode 100755
index 5945b1eb8c4c5e3ec862840f232ed7a8e386d770..0000000000000000000000000000000000000000
--- a/conf/Execution_Run_For_Output_Bag.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
-  "bag": {
-    "bag_name": "Execution_Run_{rid}",
-    "bag_algorithms": [
-      "md5"
-    ],
-    "bag_archiver": "zip",
-    "bag_metadata": {}
-  },
-  "catalog": {
-    "catalog_id": "2",
-    "query_processors": [
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Execution_Run",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Workflow",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Reference_Genome",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Input_Bag",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "mRNA_QC",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
-        }
-      }
-    ]
-  }
-}
\ No newline at end of file
diff --git a/conf/Replicate_For_Input_Bag.json b/conf/Replicate_For_Input_Bag.json
deleted file mode 100644
index 508a0245051534fae39020792719b04d78947613..0000000000000000000000000000000000000000
--- a/conf/Replicate_For_Input_Bag.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-  "bag": {
-    "bag_name": "{rid}_inputBag",
-    "bag_algorithms": [
-      "md5"
-    ],
-    "bag_archiver": "zip"
-  },
-  "catalog": {
-    "query_processors": [
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Study",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Antibodies",
-          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Custom Metadata",
-          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Settings",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Replicate",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen_Anatomical_Source",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen_Cell_Types",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Single Cell Metrics",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "File",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/File_Name::ciregexp::%5B_.%5DR%5B12%5D%5C.fastq%5C.gz/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
-        }
-      }
-    ]
-  }
-}
diff --git a/conf/aws.config b/conf/aws.config
deleted file mode 100644
index bf5b59c7cf9db00606a5db9f97c706d53f21137f..0000000000000000000000000000000000000000
--- a/conf/aws.config
+++ /dev/null
@@ -1,127 +0,0 @@
-params {
-  refSource = "aws"
-}
-
-workDir = 's3://gudmap-rbk.output/work'
-aws.client.storageEncryption = 'AES256'
-aws {
-  region = 'us-east-2'
-  batch {
-    cliPath = '/home/ec2-user/miniconda/bin/aws'
-  }
-}
-
-process {
-  executor = 'awsbatch'
-  cpus = 1
-  memory = '1 GB'
-
-  withName:trackStart {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:getBag {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:getData {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:parseMetadata {
-    cpus = 15
-    memory = '1 GB'
-  }
-  withName:trimData {
-    cpus = 20
-    memory = '2 GB'
-  }
-  withName:getRefInfer {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:downsampleData {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:alignSampleData {
-    cpus = 50
-    memory = '5 GB'
-  }
-  withName:inferMetadata {
-    cpus = 5
-    memory = '1 GB'
-  }
-  withName:checkMetadata {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:getRef {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:alignData {
-    cpus = 50
-    memory = '10 GB'
-  }
-  withName:dedupData {
-    cpus = 5
-    memory = '20 GB'
-  }
-  withName:countData {
-    cpus = 2
-    memory = '5 GB'
-  }
-  withName:makeBigWig {
-    cpus = 15
-    memory = '5 GB'
-  }
-  withName:fastqc {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:dataQC {
-    cpus = 15
-    memory = '2 GB'
-  }
-  withName:aggrQC {
-    cpus = 2
-    memory = '1 GB'
-  }
-  withName:uploadInputBag {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:uploadExecutionRun {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:uploadQC {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:uploadProcessedFile {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:uploadOutputBag {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:finalizeExecutionRun {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:failPreExecutionRun {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:failExecutionRun {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:uploadQC_fail {
-    cpus = 1
-    memory = '1 GB'
-  }
-}
diff --git a/conf/biohpc.config b/conf/biohpc.config
deleted file mode 100755
index a12f2a704b3c63df9031789c2bb05d11e04d6b3a..0000000000000000000000000000000000000000
--- a/conf/biohpc.config
+++ /dev/null
@@ -1,105 +0,0 @@
-params {
-  refSource = "biohpc"
-}
-
-process {
-  executor = 'slurm'
-  queue = 'super'
-  clusterOptions = '--hold'
-  time = '4h'
-  errorStrategy = 'retry'
-  maxRetries = 1
-
-  withName:trackStart {
-    executor = 'local'
-  }
-  withName:getBag {
-    executor = 'local'
-  }
-  withName:getData {
-    queue = 'super'
-  }
-  withName:parseMetadata {
-    executor = 'local'
-  }
-  withName:trimData {
-    queue = 'super'
-  }
-  withName:getRefInfer {
-    queue = 'super'
-  }
-  withName:downsampleData {
-    executor = 'local'
-  }
-  withName:alignSampleData {
-    queue = '128GB,256GB,256GBv1,384GB'
-  }
-  withName:inferMetadata {
-    queue = 'super'
-  }
-  withName:checkMetadata {
-    executor = 'local'
-  }
-  withName:getRef {
-    queue = 'super'
-  }
-  withName:alignData {
-    queue = '256GB,256GBv1'
-  }
-  withName:dedupData {
-    queue = 'super'
-  }
-  withName:countData {
-    queue = 'super'
-  }
-  withName:makeBigWig {
-    queue = 'super'
-  }
-  withName:fastqc {
-    queue = 'super'
-  }
-  withName:dataQC {
-    queue = 'super'
-  }
-  withName:aggrQC {
-    executor = 'local'
-  }
-  withName:uploadInputBag {
-    executor = 'local'
-  }
-  withName:uploadExecutionRun {
-    executor = 'local'
-  }
-  withName:uploadQC {
-    executor = 'local'
-  }
-  withName:uploadProcessedFile {
-    executor = 'local'
-  }
-  withName:uploadOutputBag {
-    executor = 'local'
-  }
-  withName:finalizeExecutionRun {
-    executor = 'local'
-  }
-  withName:failPreExecutionRun {
-    executor = 'local'
-  }
-  withName:failExecutionRun {
-    executor = 'local'
-  }
-  withName:uploadQC_fail {
-    executor = 'local'
-  }
-}
-
-singularity {
-  enabled = true
-  cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
-}
-
-env {
-  http_proxy = 'http://proxy.swmed.edu:3128'
-  https_proxy = 'http://proxy.swmed.edu:3128'
-  all_proxy = 'http://proxy.swmed.edu:3128'
-}
diff --git a/conf/biohpc_max.config b/conf/biohpc_max.config
deleted file mode 100755
index 0e93ccf6a0be4c15c076ab6eb955a4bb39d96120..0000000000000000000000000000000000000000
--- a/conf/biohpc_max.config
+++ /dev/null
@@ -1,16 +0,0 @@
-process {
-  executor = 'slurm'
-  queue = '256GB,256GBv1,384GB,128GB'
-  clusterOptions = '--hold'
-}
-
-singularity {
-  enabled = true
-  cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
-}
-
-env {
-  http_proxy = 'http://proxy.swmed.edu:3128'
-  https_proxy = 'http://proxy.swmed.edu:3128'
-  all_proxy = 'http://proxy.swmed.edu:3128'
-}
diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml
deleted file mode 100644
index ed1375aed47a454394029e5057695b0c15babd8c..0000000000000000000000000000000000000000
--- a/conf/multiqc_config.yaml
+++ /dev/null
@@ -1,180 +0,0 @@
-custom_logo: './bicf_logo.png'
-custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/'
-custom_logo_title: 'Bioinformatics Core Facility'
-
-report_header_info:
-  - Contact Email: 'bicf@utsouthwestern.edu'
-  - Application Type: 'RNA-Seq Analytic Pipeline for GUDMAP/RBK'
-  - Department: 'Bioinformatic Core Facility, Department of Bioinformatics, University of Texas Southwestern Medical Center'
-
-title: RNA-Seq Analytic Pipeline for GUDMAP/RBK
-
-report_comment: >
-  This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
-
-top_modules:
-  - fastqc:
-      name: 'Raw'
-      info: 'Replicate Raw fastq QC Results'
-  - cutadapt:
-      name: 'Trim'
-      info: 'Replicate Trim Adapter QC Results'
-  - hisat2:
-      name: 'Align'
-      info: 'Replicate Alignment QC Results'
-      path_filters:
-        - '*alignSummary*'
-  - picard:
-      name: 'Dedup'
-      info: 'Replicate Alignement Deduplication QC Results'
-  - rseqc:
-      name: 'Inner Distance'
-      info: 'Replicate Paired End Inner Distance Distribution Results'
-      path_filters:
-        - '*insertSize*'
-  - custom_content
-  - featureCounts:
-      name: 'Count'
-      info: 'Replicate Feature Count QC Results'
-  - hisat2:
-      name: 'Inference: Align'
-      info: 'Inference Alignment (1M downsampled reads) QC Results'
-      path_filters:
-        - '*alignSampleSummary*'
-  - rseqc:
-      name: 'Inference: Stranded'
-      info: '1M Downsampled Reads Strandedness Inference Results'
-      path_filters:
-        - '*infer_experiment*'
-
-report_section_order:
-    run:
-      order: 4000
-    rid:
-      order: 3000
-    meta:
-      order: 2000
-    ref:
-      order: 1000
-    software_versions:
-      order: -1000
-    software_references:
-      order: -2000
-
-skip_generalstats: true
-
-custom_data:
-    run:
-        file_format: 'tsv'
-        section_name: 'Run'
-        description: 'This is the run information'
-        plot_type: 'table'
-        pconfig:
-            id: 'run'
-            scale: false
-            format: '{}'
-        headers:
-            Session:
-                description: ''
-            Session ID:
-                description: 'Nextflow session ID'
-            Pipeline Version:
-                description: 'BICF pipeline version'
-            Input:
-                description: 'Input overrides'
-    rid:
-        file_format: 'tsv'
-        section_name: 'RID'
-        description: 'This is the identifying RIDs'
-        plot_type: 'table'
-        pconfig:
-            id: 'rid'
-            scale: false
-            format: '{}'
-        headers:
-            Replicate:
-                description: ''
-            Replicate RID:
-                description: 'Replicate RID'
-            Experiment RID:
-                description: 'Experiment RID'
-            Study RID:
-                description: 'Study RID'
-    meta:
-        file_format: 'tsv'
-        section_name: 'Metadata'
-        description: 'This is the comparison of infered metadata, submitter provided, and calculated'
-        plot_type: 'table'
-        pconfig:
-            id: 'meta'
-            scale: false
-            format: '{:,.0f}'
-        headers:
-            Source:
-                description: 'Metadata source'
-            Species:
-                description: 'Species'
-            Ends:
-                description: 'Single or paired end sequencing'
-            Stranded:
-                description: 'Stranded (forward/reverse) or unstranded library prep'
-            Spike-in:
-                description: 'ERCC spike in'
-            Raw Reads:
-                description: 'Number of reads of the sequencer'
-            Assigned Reads:
-                description: 'Final reads after fintering'
-            Median Read Length:
-                description: 'Average read length'
-            Median TIN:
-                description: 'Average transcript integrity number'
-
-    ref:
-        file_format: 'tsv'
-        section_name: 'Reference'
-        description: 'This is the reference version information'
-        plot_type: 'table'
-        pconfig:
-            id: 'ref'
-            scale: false
-            format: '{}'
-        headers:
-            Species:
-                description: 'Reference species'
-            Genome Reference Consortium Build:
-                description: 'Reference source build'
-            Genome Reference Consortium Patch:
-                description: 'Reference source patch version'
-            GENCODE Annotation Release:
-                description: 'Annotation release version'
-    tin:
-        file_format: 'tsv'
-        section_name: 'TIN'
-        description: 'This is the distribution of TIN values calculated by the tool RSeQC'
-        plot_type: 'bargraph'
-        pconfig:
-            id: 'tin'
-        headers:
-            chrom
-            1 - 10
-            11 - 20
-            21 - 30
-            31 - 40
-            41 - 50
-            51 - 60
-            61 - 70
-            71 - 80
-            81 - 90
-            91 - 100
-
-sp:
-    run:
-        fn: "run.tsv"
-    rid:
-        fn: 'rid.tsv'
-    meta:
-        fn: 'metadata.tsv'
-    ref:
-        fn: 'reference.tsv'
-    tin:
-        fn: '*_tin.hist.tsv'
diff --git a/conf/ondemand.config b/conf/ondemand.config
deleted file mode 100755
index 131fdbb19e1fedf1bc9e206a03d801f13791b810..0000000000000000000000000000000000000000
--- a/conf/ondemand.config
+++ /dev/null
@@ -1,3 +0,0 @@
-process {
-  queue = 'highpriority-0ef8afb0-c7ad-11ea-b907-06c94a3c6390'
-}
diff --git a/conf/spot.config b/conf/spot.config
deleted file mode 100755
index d9c7a4c8fa34aadd597da0170f8e3e223923011a..0000000000000000000000000000000000000000
--- a/conf/spot.config
+++ /dev/null
@@ -1,3 +0,0 @@
-process {
-  queue = 'default-0ef8afb0-c7ad-11ea-b907-06c94a3c6390'
-}
diff --git a/docs/dag.png b/docs/dag.png
index a19e02c47d0ca333f420061965ffda893ae42c83..b9bcdfe73831dc13544df3a33feb008ac9aed269 100755
Binary files a/docs/dag.png and b/docs/dag.png differ
diff --git a/docs/references.md b/docs/references.md
index 4ea1690ec755b51c923070352d4078634bc5e515..54b83b5ebe5fe38f0f6d4b38fee4279f9af5898c 100644
--- a/docs/references.md
+++ b/docs/references.md
@@ -9,35 +9,41 @@
 3. **BDBag**:  
   * D'Arcy, M., Chard, K., Foster, I., Kesselman, C., Madduri, R., Saint, N., & Wagner, R.. 2019. Big Data Bags: A Scalable Packaging Format for Science. Zenodo. doi:[10.5281/zenodo.3338725](http://doi.org/10.5281/zenodo.3338725).
 
-4. **RSeQC**:
-  * Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:[10.1093/bioinformatics/bts356](https://doi.org/10.1093/bioinformatics/bts356).
-
-5. **trimgalore**:
+4. **trimgalore**:
   * trimgalore [https://github.com/FelixKrueger/TrimGalore](https://github.com/FelixKrueger/TrimGalore)
 
-6. **hisat2**:
+5. **hisat2**:
   * Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. 2019 Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. Aug;37(8):907-915. doi:[10.1038/s41587-019-0201-4](https://doi.org/10.1038/s41587-019-0201-4).
 
-7. **samtools**:
+6. **samtools**:
   * Li H., B. Handsaker, A. Wysoker, T. Fennell, J. Ruan, N. Homer, G. Marth, G. Abecasis, R. Durbin, and 1000 Genome Project Data Processing Subgroup. 2009. The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics 25: 2078-9. doi:[10.1093/bioinformatics/btp352](http://dx.doi.org/10.1093/bioinformatics/btp352)
 
-8. **picard**:
+7. **picard**:
   * â€œPicard Toolkit.â€ 2019. Broad Institute, GitHub Repository. [http://broadinstitute.github.io/picard/](http://broadinstitute.github.io/picard/); Broad Institute
 
-9. **featureCounts**:
+8. **featureCounts**:
   * Liao, Y., Smyth, G.K., Shi, W. 2014 featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. Apr 1;30(7):923-30. doi:[10.1093/bioinformatics/btt656](https://doi.org/10.1093/bioinformatics/btt656).
 
-10. **R**:
-  * R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:[http://www.R-project.org/](http://www.R-project.org/).
-
-11. **deeptools**:
+9. **deeptools**:
   * RamÃrez, F., D. P. Ryan, B. GrÃ¼ning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. DÃ¼ndar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:[10.1093/nar/gkw257](http://dx.doi.org/10.1093/nar/gkw257)
 
+10. **Seqtk**:
+  * Seqtk [https://github.com/lh3/seqtk](https://github.com/lh3/seqtk)
+
+11. **R**:
+  * R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:[http://www.R-project.org/](http://www.R-project.org/).
+
 12. **FastQC**
   * FastQC [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
 
-13. **MultiQC**:
+13. **SeqWho**
+  * SeqWho [https://git.biohpc.swmed.edu/s181649/seqwho](https://git.biohpc.swmed.edu/s181649/seqwho)
+
+14. **RSeQC**:
+  * Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:[10.1093/bioinformatics/bts356](https://doi.org/10.1093/bioinformatics/bts356).
+
+15. **MultiQC**:
   * Ewels P., Magnusson M., Lundin S. and KÃ¤ller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047â€“3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
 
-14. **Nextflow**:
-  * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.
+16. **Nextflow**:
+  * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.
\ No newline at end of file
diff --git a/docs/software_references_mqc.yaml b/docs/software_references_mqc.yaml
old mode 100755
new mode 100644
index d9d18558b7df3f626ff89cdb01c610228db92a8b..ac14c454cb40ac795d984c5b6c6a5970c56bd007
--- a/docs/software_references_mqc.yaml
+++ b/docs/software_references_mqc.yaml
@@ -25,12 +25,6 @@
                 <ul>
                 <li>D'Arcy, M., Chard, K., Foster, I., Kesselman, C., Madduri, R., Saint, N., &amp; Wagner, R.. 2019. Big Data Bags: A Scalable Packaging Format for Science. Zenodo. doi:<a href="http://doi.org/10.5281/zenodo.3338725">10.5281/zenodo.3338725</a>.</li>
                 </ul>
-                <ol start="4" style="list-style-type: decimal">
-                <li><strong>RSeQC</strong>:</li>
-                </ol>
-                <ul>
-                <li>Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:<a href="https://doi.org/10.1093/bioinformatics/bts356">10.1093/bioinformatics/bts356</a>.</li>
-                </ul>
                 <ol start="5" style="list-style-type: decimal">
                 <li><strong>trimgalore</strong>:</li>
                 </ol>
@@ -61,17 +55,23 @@
                 <ul>
                 <li>Liao, Y., Smyth, G.K., Shi, W. 2014 featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. Apr 1;30(7):923-30. doi:<a href="https://doi.org/10.1093/bioinformatics/btt656">10.1093/bioinformatics/btt656</a>.</li>
                 </ul>
-                <ol start="10" style="list-style-type: decimal">
-                <li><strong>R</strong>:</li>
+                <ol start="11" style="list-style-type: decimal">
+                <li><strong>deeptools</strong>:</li>
                 </ol>
                 <ul>
-                <li>R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:<a href="http://www.R-project.org/" class="uri">http://www.R-project.org/</a>.</li>
+                <li>RamÃrez, F., D. P. Ryan, B. GrÃ¼ning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. DÃ¼ndar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:<a href="http://dx.doi.org/10.1093/nar/gkw257">10.1093/nar/gkw257</a></li>
                 </ul>
                 <ol start="11" style="list-style-type: decimal">
-                <li><strong>deeptools</strong>:</li>
+                <li><strong>Seqtk</strong>:</li>
                 </ol>
                 <ul>
-                <li>RamÃrez, F., D. P. Ryan, B. GrÃ¼ning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. DÃ¼ndar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:<a href="http://dx.doi.org/10.1093/nar/gkw257">10.1093/nar/gkw257</a></li>
+                <li>Seqtk <a href="https://github.com/lh3/seqtk" class="uri">https://github.com/lh3/seqtk</a></li>
+                </ul>
+                <ol start="10" style="list-style-type: decimal">
+                <li><strong>R</strong>:</li>
+                </ol>
+                <ul>
+                <li>R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:<a href="http://www.R-project.org/" class="uri">http://www.R-project.org/</a>.</li>
                 </ul>
                 <ol start="12" style="list-style-type: decimal">
                 <li><strong>FastQC</strong></li>
@@ -79,6 +79,18 @@
                 <ul>
                 <li>FastQC <a href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" class="uri">https://www.bioinformatics.babraham.ac.uk/projects/fastqc/</a></li>
                 </ul>
+                <ol start="12" style="list-style-type: decimal">
+                <li><strong>SeqWho</strong></li>
+                </ol>
+                <ul>
+                <li>SeqWho <a href="https://git.biohpc.swmed.edu/s181649/seqwho" class="uri">https://git.biohpc.swmed.edu/s181649/seqwho</a></li>
+                </ul>
+                <ol start="4" style="list-style-type: decimal">
+                <li><strong>RSeQC</strong>:</li>
+                </ol>
+                <ul>
+                <li>Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:<a href="https://doi.org/10.1093/bioinformatics/bts356">10.1093/bioinformatics/bts356</a>.</li>
+                </ul>
                 <ol start="13" style="list-style-type: decimal">
                 <li><strong>MultiQC</strong>:</li>
                 </ol>
diff --git a/docs/software_versions_mqc.yaml b/docs/software_versions_mqc.yaml
old mode 100755
new mode 100644
index 5eb233d1b251787f3ad3cd14b2b7133259383e02..61c00f4b803d5da66cd19b930a1a15f74fa521ea
--- a/docs/software_versions_mqc.yaml
+++ b/docs/software_versions_mqc.yaml
@@ -1,7 +1,7 @@
 
         id: 'software_versions'
         section_name: 'Software Versions'
-        section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/blob/78-tool_version/docs/RNA-Seq%20Pipeline%20Design%20Process%20Table.pdf'
+        section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/wikis/Pipeline/Tool-Versions'
         plot_type: 'html'
         description: 'are collected for pipeline version.'
         data: |
@@ -10,15 +10,17 @@
             <dt>Python</dt><dd>v3.8.3</dd>
             <dt>DERIVA</dt><dd>v1.4.3</dd>
             <dt>BDBag</dt><dd>v1.5.6</dd>
-            <dt>RSeQC</dt><dd>v4.0.0</dd>
             <dt>Trim Galore!</dt><dd>v0.6.4_dev</dd>
             <dt>HISAT2</dt><dd>v2.2.1</dd>
             <dt>Samtools</dt><dd>v1.11</dd>
             <dt>picard (MarkDuplicates)</dt><dd>v2.23.9</dd>
             <dt>featureCounts</dt><dd>v2.0.1</dd>
-            <dt>R</dt><dd>v4.0.3</dd>
             <dt>deepTools</dt><dd>v3.5.0</dd>
+            <dt>Seqtk</dt><dd>v1.3-r106</dd>
+            <dt>R</dt><dd>v4.0.3</dd>
             <dt>FastQC</dt><dd>v0.11.9</dd>
+            <dt>SeqWho</dt><dd>vBeta-1.0.0</dd>
+            <dt>RSeQC</dt><dd>v4.0.0</dd>
             <dt>MultiQC</dt><dd>v1.9</dd>
-            <dt>Pipeline Version</dt><dd>v1.0.2</dd>
+            <dt>Pipeline Version</dt><dd>v2.0.0rc01</dd>
             </dl>
diff --git a/nextflow.config b/nextflow.config
index 288fc9d0f788b460ae1eddf8c0f32ecc5d035125..7e046c710b26b487578556e4f77bba3708d48258 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,6 +22,9 @@ profiles {
 }
 
 process {
+  withName:trackStart {
+    container = 'gudmaprbk/gudmap-rbk_base:1.0.0'
+  }
   withName:getBag {
     container = 'gudmaprbk/deriva1.4:1.0.0'
   }
@@ -31,15 +34,27 @@ process {
   withName:parseMetadata {
     container = 'gudmaprbk/python3:1.0.0'
   }
-  withName:trimData {
-    container = 'gudmaprbk/trimgalore0.6.5:1.0.0'
+  withName:getRefERCC {
+    container = 'gudmaprbk/deriva1.4:1.0.0'
   }
-  withName:getRefInfer {
+  withName:getRef {
     container = 'gudmaprbk/deriva1.4:1.0.0'
   }
+  withName:fastqc {
+    container = 'gudmaprbk/fastqc0.11.9:1.0.0'
+  }
+  withName:seqwho {
+    container = 'gudmaprbk/seqwho0.0.1:1.0.0'
+  }
+  withName:trimData {
+    container = 'gudmaprbk/trimgalore0.6.5:1.0.0'
+  }
   withName:downsampleData {
     container = 'gudmaprbk/seqtk1.3:1.0.0'
   }
+  withName:alignSampleDataERCC {
+    container = 'gudmaprbk/hisat2.2.1:1.0.0'
+  }
   withName:alignSampleData {
     container = 'gudmaprbk/hisat2.2.1:1.0.0'
   }
@@ -49,9 +64,6 @@ process {
   withName:checkMetadata {
     container = 'gudmaprbk/gudmap-rbk_base:1.0.0'
   }
-  withName:getRef {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
   withName:alignData {
     container = 'gudmaprbk/hisat2.2.1:1.0.0'
   }
@@ -64,9 +76,6 @@ process {
   withName:makeBigWig {
     container = 'gudmaprbk/deeptools3.5.0:1.0.0'
   }
-  withName:fastqc {
-    container = 'gudmaprbk/fastqc0.11.9:1.0.0'
-  }
   withName:dataQC {
     container = 'gudmaprbk/rseqc4.0.0:1.0.0'
   }
diff --git a/nextflowConf/Execution_Run_For_Output_Bag.json b/nextflowConf/Execution_Run_For_Output_Bag.json
deleted file mode 100755
index 5945b1eb8c4c5e3ec862840f232ed7a8e386d770..0000000000000000000000000000000000000000
--- a/nextflowConf/Execution_Run_For_Output_Bag.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
-  "bag": {
-    "bag_name": "Execution_Run_{rid}",
-    "bag_algorithms": [
-      "md5"
-    ],
-    "bag_archiver": "zip",
-    "bag_metadata": {}
-  },
-  "catalog": {
-    "catalog_id": "2",
-    "query_processors": [
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Execution_Run",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Workflow",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Reference_Genome",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Input_Bag",
-          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "mRNA_QC",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag",
-          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
-        }
-      }
-    ]
-  }
-}
\ No newline at end of file
diff --git a/nextflowConf/Replicate_For_Input_Bag.json b/nextflowConf/Replicate_For_Input_Bag.json
deleted file mode 100644
index 508a0245051534fae39020792719b04d78947613..0000000000000000000000000000000000000000
--- a/nextflowConf/Replicate_For_Input_Bag.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-  "bag": {
-    "bag_name": "{rid}_inputBag",
-    "bag_algorithms": [
-      "md5"
-    ],
-    "bag_archiver": "zip"
-  },
-  "catalog": {
-    "query_processors": [
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Study",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Antibodies",
-          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Custom Metadata",
-          "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Experiment Settings",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Replicate",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen_Anatomical_Source",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Specimen_Cell_Types",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "Single Cell Metrics",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
-        }
-      },
-      {
-        "processor": "csv",
-        "processor_params": {
-          "output_path": "File",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none"
-        }
-      },
-      {
-        "processor": "fetch",
-        "processor_params": {
-          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/File_Name::ciregexp::%5B_.%5DR%5B12%5D%5C.fastq%5C.gz/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
-        }
-      }
-    ]
-  }
-}
diff --git a/nextflowConf/bdbag.json b/nextflowConf/bdbag.json
deleted file mode 100644
index 2c2ab245e7d3470d8bb341136dac278360b4d99f..0000000000000000000000000000000000000000
--- a/nextflowConf/bdbag.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "fetch_config": {
-    "http": {
-      "http_cookies": {
-        "file_names": [
-            "*cookies.txt"
-        ],
-        "scan_for_cookie_files": true,
-        "search_paths": [
-            "."
-        ],
-        "search_paths_filter": "*cookies.txt"
-      }
-    },
-    "https": {
-      "http_cookies": {
-        "file_names": [
-            "*cookies.txt"
-        ],
-        "scan_for_cookie_files": true,
-        "search_paths": [
-            "."
-        ],
-        "search_paths_filter": "*cookies.txt"
-      }
-    }
-  }
-}
diff --git a/nextflowConf/biohpc.config b/nextflowConf/biohpc.config
index a12f2a704b3c63df9031789c2bb05d11e04d6b3a..dff28cb4ae54ee54ad63204ec8bd88e2441eb71b 100755
--- a/nextflowConf/biohpc.config
+++ b/nextflowConf/biohpc.config
@@ -22,15 +22,27 @@ process {
   withName:parseMetadata {
     executor = 'local'
   }
-  withName:trimData {
+  withName:getRefERCC {
     queue = 'super'
   }
-  withName:getRefInfer {
+  withName:getRef {
+    queue = 'super'
+  }
+  withName:fastqc {
+    queue = 'super'
+  }
+  withName:seqwho {
+    executor = 'local'
+  }
+  withName:trimData {
     queue = 'super'
   }
   withName:downsampleData {
     executor = 'local'
   }
+  withName:alignSampleDataERCC {
+    queue = '128GB,256GB,256GBv1,384GB'
+  }
   withName:alignSampleData {
     queue = '128GB,256GB,256GBv1,384GB'
   }
@@ -40,9 +52,6 @@ process {
   withName:checkMetadata {
     executor = 'local'
   }
-  withName:getRef {
-    queue = 'super'
-  }
   withName:alignData {
     queue = '256GB,256GBv1'
   }
@@ -55,9 +64,6 @@ process {
   withName:makeBigWig {
     queue = 'super'
   }
-  withName:fastqc {
-    queue = 'super'
-  }
   withName:dataQC {
     queue = 'super'
   }
diff --git a/nextflowConf/dnanexus.config b/nextflowConf/dnanexus.config
index d82ff2bd07adf66caaa827a6f86d5970f20729b6..58531a418a6cc0c31c80e1f155cfe76007b98e8a 100755
--- a/nextflowConf/dnanexus.config
+++ b/nextflowConf/dnanexus.config
@@ -23,12 +23,27 @@ process {
     cpus = 1
     memory = '1 GB'
   }
-  withName:trimData {
+  withName:getRefERCC {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:getRef {
     machineType = 'mem1_ssd1_v2_x16'
     cpus = 16
     memory = '32 GB'
   }
-  withName:getRefInfer {
+  withName:fastqc {
+    machineType = 'mem1_ssd1_v2_x16'
+    cpus = 16
+    memory = '32 GB'
+  }
+  withName:seqwho {
+    executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName:trimData {
     machineType = 'mem1_ssd1_v2_x16'
     cpus = 16
     memory = '32 GB'
@@ -38,6 +53,9 @@ process {
     cpus = 1
     memory = '1 GB'
   }
+  withName:alignSampleDataERCC {
+    queue = '128GB,256GB,256GBv1,384GB'
+  }
   withName:alignSampleData {
     machineType = 'mem3_ssd1_v2_x16'
     cpus = 16
@@ -53,11 +71,6 @@ process {
     cpus = 1
     memory = '1 GB'
   }
-  withName:getRef {
-    machineType = 'mem1_ssd1_v2_x16'
-    cpus = 16
-    memory = '32 GB'
-  }
   withName:alignData {
     machineType = 'mem3_ssd1_v2_x32'
     cpus = 32
@@ -78,11 +91,6 @@ process {
     cpus = 16
     memory = '32 GB'
   }
-  withName:fastqc {
-    machineType = 'mem1_ssd1_v2_x16'
-    cpus = 16
-    memory = '32 GB'
-  }
   withName:dataQC {
     machineType = 'mem1_ssd1_v2_x16'
     cpus = 16
@@ -95,6 +103,8 @@ process {
   }
   withName:uploadInputBag {
     executor = 'dnanexus'
+    cpus = 1
+    memory = '1 GB'
   }
   withName:uploadExecutionRun {
     executor = 'dnanexus'
diff --git a/nextflowConf/multiqc_config.yaml b/nextflowConf/multiqc_config.yaml
deleted file mode 100644
index ed1375aed47a454394029e5057695b0c15babd8c..0000000000000000000000000000000000000000
--- a/nextflowConf/multiqc_config.yaml
+++ /dev/null
@@ -1,180 +0,0 @@
-custom_logo: './bicf_logo.png'
-custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/'
-custom_logo_title: 'Bioinformatics Core Facility'
-
-report_header_info:
-  - Contact Email: 'bicf@utsouthwestern.edu'
-  - Application Type: 'RNA-Seq Analytic Pipeline for GUDMAP/RBK'
-  - Department: 'Bioinformatic Core Facility, Department of Bioinformatics, University of Texas Southwestern Medical Center'
-
-title: RNA-Seq Analytic Pipeline for GUDMAP/RBK
-
-report_comment: >
-  This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
-
-top_modules:
-  - fastqc:
-      name: 'Raw'
-      info: 'Replicate Raw fastq QC Results'
-  - cutadapt:
-      name: 'Trim'
-      info: 'Replicate Trim Adapter QC Results'
-  - hisat2:
-      name: 'Align'
-      info: 'Replicate Alignment QC Results'
-      path_filters:
-        - '*alignSummary*'
-  - picard:
-      name: 'Dedup'
-      info: 'Replicate Alignement Deduplication QC Results'
-  - rseqc:
-      name: 'Inner Distance'
-      info: 'Replicate Paired End Inner Distance Distribution Results'
-      path_filters:
-        - '*insertSize*'
-  - custom_content
-  - featureCounts:
-      name: 'Count'
-      info: 'Replicate Feature Count QC Results'
-  - hisat2:
-      name: 'Inference: Align'
-      info: 'Inference Alignment (1M downsampled reads) QC Results'
-      path_filters:
-        - '*alignSampleSummary*'
-  - rseqc:
-      name: 'Inference: Stranded'
-      info: '1M Downsampled Reads Strandedness Inference Results'
-      path_filters:
-        - '*infer_experiment*'
-
-report_section_order:
-    run:
-      order: 4000
-    rid:
-      order: 3000
-    meta:
-      order: 2000
-    ref:
-      order: 1000
-    software_versions:
-      order: -1000
-    software_references:
-      order: -2000
-
-skip_generalstats: true
-
-custom_data:
-    run:
-        file_format: 'tsv'
-        section_name: 'Run'
-        description: 'This is the run information'
-        plot_type: 'table'
-        pconfig:
-            id: 'run'
-            scale: false
-            format: '{}'
-        headers:
-            Session:
-                description: ''
-            Session ID:
-                description: 'Nextflow session ID'
-            Pipeline Version:
-                description: 'BICF pipeline version'
-            Input:
-                description: 'Input overrides'
-    rid:
-        file_format: 'tsv'
-        section_name: 'RID'
-        description: 'This is the identifying RIDs'
-        plot_type: 'table'
-        pconfig:
-            id: 'rid'
-            scale: false
-            format: '{}'
-        headers:
-            Replicate:
-                description: ''
-            Replicate RID:
-                description: 'Replicate RID'
-            Experiment RID:
-                description: 'Experiment RID'
-            Study RID:
-                description: 'Study RID'
-    meta:
-        file_format: 'tsv'
-        section_name: 'Metadata'
-        description: 'This is the comparison of infered metadata, submitter provided, and calculated'
-        plot_type: 'table'
-        pconfig:
-            id: 'meta'
-            scale: false
-            format: '{:,.0f}'
-        headers:
-            Source:
-                description: 'Metadata source'
-            Species:
-                description: 'Species'
-            Ends:
-                description: 'Single or paired end sequencing'
-            Stranded:
-                description: 'Stranded (forward/reverse) or unstranded library prep'
-            Spike-in:
-                description: 'ERCC spike in'
-            Raw Reads:
-                description: 'Number of reads of the sequencer'
-            Assigned Reads:
-                description: 'Final reads after fintering'
-            Median Read Length:
-                description: 'Average read length'
-            Median TIN:
-                description: 'Average transcript integrity number'
-
-    ref:
-        file_format: 'tsv'
-        section_name: 'Reference'
-        description: 'This is the reference version information'
-        plot_type: 'table'
-        pconfig:
-            id: 'ref'
-            scale: false
-            format: '{}'
-        headers:
-            Species:
-                description: 'Reference species'
-            Genome Reference Consortium Build:
-                description: 'Reference source build'
-            Genome Reference Consortium Patch:
-                description: 'Reference source patch version'
-            GENCODE Annotation Release:
-                description: 'Annotation release version'
-    tin:
-        file_format: 'tsv'
-        section_name: 'TIN'
-        description: 'This is the distribution of TIN values calculated by the tool RSeQC'
-        plot_type: 'bargraph'
-        pconfig:
-            id: 'tin'
-        headers:
-            chrom
-            1 - 10
-            11 - 20
-            21 - 30
-            31 - 40
-            41 - 50
-            51 - 60
-            61 - 70
-            71 - 80
-            81 - 90
-            91 - 100
-
-sp:
-    run:
-        fn: "run.tsv"
-    rid:
-        fn: 'rid.tsv'
-    meta:
-        fn: 'metadata.tsv'
-    ref:
-        fn: 'reference.tsv'
-    tin:
-        fn: '*_tin.hist.tsv'
diff --git a/rna-seq.nf b/rna-seq.nf
index 30fb31a9f73071647582ece33e9c6c4ef945730b..311b6c34423e300c013fb1fd5a8407b3b4f708dd 100644
--- a/rna-seq.nf
+++ b/rna-seq.nf
@@ -22,11 +22,11 @@ params.upload = false
 params.email = ""
 params.track = false
 
-
 // Define override input variable
 params.refSource = "biohpc"
 params.inputBagForce = ""
 params.fastqsForce = ""
+params.endsForce = ""
 params.speciesForce = ""
 params.strandedForce = ""
 params.spikeForce = ""
@@ -35,14 +35,13 @@ params.spikeForce = ""
 params.ci = false
 params.dev = true
 
-
 // Parse input variables
 deriva = Channel
   .fromPath(params.deriva)
   .ifEmpty { exit 1, "deriva credential file not found: ${params.deriva}" }
 deriva.into {
   deriva_getBag
-  deriva_getRefInfer
+  deriva_getRefERCC
   deriva_getRef
   deriva_uploadInputBag
   deriva_uploadExecutionRun
@@ -66,6 +65,7 @@ logsDir = "${outDir}/Logs"
 upload = params.upload
 inputBagForce = params.inputBagForce
 fastqsForce = params.fastqsForce
+endsForce = params.endsForce
 speciesForce = params.speciesForce
 strandedForce = params.strandedForce
 spikeForce = params.spikeForce
@@ -87,7 +87,6 @@ if (params.refSource == "biohpc") {
 } else if (params.refSource == "datahub") {
   referenceBase = "www.gudmap.org"
 }
-referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
 multiqcConfig = Channel.fromPath("${baseDir}/workflow/conf/multiqc_config.yaml")
 bicfLogo = Channel.fromPath("${baseDir}/docs/bicf_logo.png")
 softwareReferences = Channel.fromPath("${baseDir}/docs/software_references_mqc.yaml")
@@ -118,7 +117,6 @@ script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/workflow/s
  * trackStart: track start of pipeline
  */
 process trackStart {
-  container 'gudmaprbk/gudmap-rbk_base:1.0.0'
   script:
     """
     hostname
@@ -220,7 +218,7 @@ process getBag {
     """
 }
 
-// Set inputBag to downloaded or forced input
+// Set inputBag to downloaded or forced input and replicate them for multiple process inputs 
 if (inputBagForce != "") {
   inputBag = Channel
     .fromPath(inputBagForce)
@@ -234,7 +232,7 @@ inputBag.into {
 }
 
 /*
- * getData: fetch replicate files from consortium with downloaded bdbag.zip
+ * getData: fetch replicate files from consortium with downloaded input bag
  */
 process getData {
   tag "${repRID}"
@@ -267,9 +265,15 @@ process getData {
     echo -e "LOG: unzipped" >> ${repRID}.getData.log
 
     # bag fetch fastq's only and rename by repRID
-    echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
-    fastqCount=\$(sh ${script_bdbagFetch} \${replicate::-13} ${repRID})
-    echo -e "LOG: fetched" >> ${repRID}.getData.log
+    if [ "${params.fastqsForce}" == "" ]
+    then
+      echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
+      fastqCount=\$(sh ${script_bdbagFetch} \${replicate::-13} ${repRID})
+      echo -e "LOG: fetched" >> ${repRID}.getData.log
+    else
+      echo -e "LOG: fastq override detected, not fetching fastqs" >> ${repRID}.getData.log
+      fastqCount="0"
+    fi
 
     if [ "\${fastqCount}" == "0" ]
     then
@@ -280,9 +284,10 @@ process getData {
 }
 
 // Split fastq count into channel
+fastqCountTemp = Channel.create()
 fastqCount = Channel.create()
 fastqCount_fl.splitCsv(sep: ",", header: false).separate(
-  fastqCount
+  fastqCountTemp
 )
 
 // Set raw fastq to downloaded or forced input and replicate them for multiple process inputs
@@ -291,19 +296,31 @@ if (fastqsForce != "") {
     .fromPath(fastqsForce)
     .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
     .collect().into {
+      fastqs_seqwho
+      fastqs_trimData
       fastqs_parseMetadata
       fastqs_fastqc
     }
+  Channel
+    .fromPath(fastqsForce)
+    .count().set {
+    fastqCount
+    }
 } else {
   fastqs.collect().into {
+    fastqs_seqwho
+    fastqs_trimData
     fastqs_parseMetadata
     fastqs_fastqc
   }
+  fastqCountTemp.set {
+    fastqCount
+  }
 }
 
 /*
  * parseMetadata: parses metadata to extract experiment parameters
-*/
+ */
 process parseMetadata {
   tag "${repRID}"
 
@@ -458,7 +475,7 @@ process parseMetadata {
     """
 }
 
-// Split metadata into separate channels
+// Split metadata into separate channels and replicate them for multiple process inputs
 endsMeta = Channel.create()
 endsRaw = Channel.create()
 endsManual = Channel.create()
@@ -479,16 +496,16 @@ metadata_fl.splitCsv(sep: ",", header: false).separate(
   expRID,
   studyRID
 )
-
-// Replicate metadata for multiple process inputs
 endsMeta.into {
   endsMeta_checkMetadata
   endsMeta_aggrQC
   endsMeta_failExecutionRun
 }
 endsManual.into {
+  endsManual_seqwho
   endsManual_trimData
   endsManual_downsampleData
+  endsManual_alignSampleDataERCC
   endsManual_alignSampleData
   endsManual_aggrQC
 }
@@ -504,6 +521,7 @@ spikeMeta.into {
   spikeMeta_failExecutionRun
 }
 speciesMeta.into {
+  speciesMeta_seqwho
   speciesMeta_checkMetadata
   speciesMeta_aggrQC
   speciesMeta_failPreExecutionRun
@@ -520,7 +538,7 @@ expRID.into {
   expRID_uploadProcessedFile
 }
 
-// Split fastq count error into separate channel
+// Split fastq count error into separate channel and replicate them for multiple process inputs
 fastqCountError = Channel.create()
 fastqCountError_details = Channel.create()
 fastqReadError = Channel.create()
@@ -531,72 +549,73 @@ fastqError_fl.splitCsv(sep: ",", header: false).separate(
   fastqReadError,
   fastqReadError_details
 )
-
-//  Replicate errors for multiple process inputs
 fastqCountError.into {
   fastqCountError_fastqc
+  fastqCountError_seqwho
+  fastqCountError_getRefERCC
+  fastqCountError_getRef
   fastqCountError_trimData
-  fastqCountError_getRefInfer
   fastqCountError_downsampleData
+  fastqCountError_alignSampleDataERCC
   fastqCountError_alignSampleData
   fastqCountError_inferMetadata
   fastqCountError_checkMetadata
-  fastqCountError_uploadExecutionRun
-  fastqCountError_getRef
   fastqCountError_alignData
   fastqCountError_dedupData
   fastqCountError_makeBigWig
   fastqCountError_countData
   fastqCountError_dataQC
   fastqCountError_aggrQC
+  fastqCountError_uploadExecutionRun
   fastqCountError_uploadQC
-  fastqCountError_uploadQC_fail
   fastqCountError_uploadProcessedFile
   fastqCountError_uploadOutputBag
-  fastqCountError_failPreExecutionRun_fastq
+  fastqCountError_finalizeExecutionRun
+  fastqCountError_uploadQC_fail
 }
 fastqReadError.into {
   fastqReadError_fastqc
+  fastqReadError_seqwho
+  fastqReadError_getRefERCC
+  fastqReadError_getRef
   fastqReadError_trimData
-  fastqReadError_getRefInfer
   fastqReadError_downsampleData
+  fastqReadError_alignSampleDataERCC
   fastqReadError_alignSampleData
   fastqReadError_inferMetadata
   fastqReadError_checkMetadata
-  fastqReadError_uploadExecutionRun
-  fastqReadError_getRef
   fastqReadError_alignData
   fastqReadError_dedupData
   fastqReadError_makeBigWig
   fastqReadError_countData
   fastqReadError_dataQC
   fastqReadError_aggrQC
+  fastqReadError_uploadExecutionRun
   fastqReadError_uploadQC
-  fastqReadError_uploadQC_fail
   fastqReadError_uploadProcessedFile
   fastqReadError_uploadOutputBag
-  fastqReadError_failPreExecutionRun_fastq
+  fastqReadError_finalizeExecutionRun
+  fastqReadError_uploadQC_fail
 }
 
 /*
- *fastqc: run fastqc on untrimmed fastq's
-*/
+ * fastqc: run fastqc on untrimmed fastq's
+ */
 process fastqc {
   tag "${repRID}"
 
   input:
-    path (fastq) from fastqs_fastqc.collect()
-    val fastqCountError_fastqc
-    val fastqReadError_fastqc
+    path (fastq) from fastqs_fastqc
+    val fastqCountError from fastqCountError_fastqc
+    val fastqReadError from fastqReadError_fastqc
 
   output:
-    path ("*.R{1,2}.fastq.gz", includeInputs:true) into fastqs_trimData
     path ("*_fastqc.zip") into fastqc
     path ("rawReads.csv") into rawReadsInfer_fl
     path "fastqFileError.csv" into fastqFileError_fl
 
   when:
-    fastqCountError_fastqc == 'false' && fastqReadError_fastqc == 'false'
+    fastqCountError == "false" && fastqReadError == "false"
 
   script:
     """
@@ -627,178 +646,429 @@ process fastqc {
     """
 }
 
-// Extract number of raw reads metadata into channel
+// Extract number of raw reads metadata into channel and replicate them for multiple process inputs
 rawReadsInfer = Channel.create()
 rawReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
   rawReadsInfer
 )
-
-// Replicate inferred raw reads for multiple process inputs
 rawReadsInfer.into {
   rawReadsInfer_aggrQC
   rawReadsInfer_uploadQC
 }
 
-// Split fastq count error into separate channel
+// Split fastq file error into separate channel and replicate them for multiple process inputs
 fastqFileError = Channel.create()
 fastqFileError_details = Channel.create()
 fastqFileError_fl.splitCsv(sep: ",", header: false).separate(
   fastqFileError,
   fastqFileError_details
 )
-
-//  Replicate errors for multiple process inputs
 fastqFileError.into {
-  fastqFileError_fastqc
   fastqFileError_trimData
-  fastqFileError_getRefInfer
+  fastqFileError_getRef
   fastqFileError_downsampleData
+  fastqFileError_alignSampleDataERCC
   fastqFileError_alignSampleData
   fastqFileError_inferMetadata
   fastqFileError_checkMetadata
-  fastqFileError_uploadExecutionRun
-  fastqFileError_getRef
   fastqFileError_alignData
   fastqFileError_dedupData
   fastqFileError_makeBigWig
   fastqFileError_countData
   fastqFileError_dataQC
   fastqFileError_aggrQC
+  fastqFileError_uploadExecutionRun
   fastqFileError_uploadQC
-  fastqFileError_uploadQC_fail
   fastqFileError_uploadProcessedFile
   fastqFileError_uploadOutputBag
-  fastqFileError_failPreExecutionRun_fastqFile
+  fastqFileError_finalizeExecutionRun
+  fastqFileError_uploadQC_fail
 }
 
 /*
- * trimData: trims any adapter or non-host sequences from the data
-*/
-process trimData {
+ * seqwho: run seqwho to infer species and seq type
+ */
+process seqwho {
   tag "${repRID}"
 
   input:
-    path (fastq) from fastqs_trimData
-    val ends from endsManual_trimData
-    val fastqCountError_trimData
-    val fastqReadError_trimData
-    val fastqFileError_trimData
+    path (fastq) from fastqs_seqwho
+    val ends from endsManual_seqwho
+    val speciesMeta from speciesMeta_seqwho
+    val fastqCountError from fastqCountError_seqwho
+    val fastqReadError from fastqReadError_seqwho
 
   output:
-    path ("*.fq.gz") into fastqsTrim
-    path ("*_trimming_report.txt") into trimQC
-    path ("readLength.csv") into readLengthInfer_fl
+    path "seqwhoInfer.tsv" into seqwhoInfer
+    path "inferSpecies.csv" into inferSpecies_fl
+    path "inferError.csv" into inferError_fl
 
   when:
-    fastqCountError_trimData == "false"
-    fastqReadError_trimData == "false"
-    fastqFileError_trimData == "false"
+    fastqCountError == "false" && fastqReadError == "false"
 
   script:
     """
-    hostname > ${repRID}.trimData.log
-    ulimit -a >> ${repRID}.trimData.log
+    hostname > ${repRID}.seqwho.log
+    ulimit -a >> ${repRID}.seqwho.log
 
-    # trim fastq's using trim_galore and extract median read length
-    echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
-    if [ "${ends}" == "se" ]
+    # get seqwho index
+    wget -O SeqWho.ix https://cloud.biohpc.swmed.edu/index.php/s/eeNWqZz8jqN5zWY/download
+    echo -e "LOG: seqwho index downloaded" >> ${repRID}.seqwho.log
+    
+    # run seqwho
+    seqwho.py -f *.fastq.gz -x SeqWho.ix
+    echo -e "LOG: seqwho ran" >> ${repRID}.seqwho.log
+
+    # parse inference from R1
+    speciesR1=\$(cat SeqWho_call.tsv | grep ${fastq[0]} | cut -f17 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+    seqtypeR1=\$(cat SeqWho_call.tsv | grep ${fastq[0]} | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+    confidenceR1=\$(cat SeqWho_call.tsv | grep ${fastq[0]} | cut -f16 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+    if [ "\${confidenceR1}" == "low" ]
+    then
+      speciesConfidenceR1=\$(cat SeqWho_call.tsv | grep ${fastq[0]} | cut -f16 -d\$'\t' | cut -f3 -d":" | tr -d " ")
+      seqtypeConfidenceR1=\$(cat SeqWho_call.tsv | grep ${fastq[0]} | cut -f16 -d\$'\t' | cut -f4 -d":" | tr -d " ")
+    else
+      speciesConfidenceR1="1"
+      seqtypeConfidenceR1="1"
+    fi
+    echo -e "LOG: R1 inference parsed" >> ${repRID}.seqwho.log
+
+    # parse inference from R2
+    if [ "${ends}" == "pe" ]
     then
-      trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]}
-      readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
-    elif [ "${ends}" == "pe" ]
+      speciesR2=\$(cat SeqWho_call.tsv | grep ${fastq[1]} | cut -f17 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      seqtypeR2=\$(cat SeqWho_call.tsv | grep ${fastq[1]} | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      confidenceR2=\$(cat SeqWho_call.tsv | grep ${fastq[1]} | cut -f16 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      if [ "\${confidenceR2}" == "low" ]
+      then
+        speciesConfidenceR2=\$(cat SeqWho_call.tsv | grep ${fastq[1]} | cut -f16 -d\$'\t' | cut -f3 -d":" | tr -d " ")
+        seqtypeConfidenceR2=\$(cat SeqWho_call.tsv | grep ${fastq[1]} | cut -f16 -d\$'\t' | cut -f4 -d":" | tr -d " ")
+      else
+        speciesConfidenceR2="1"
+        seqtypeConfidenceR2="1"
+      fi
+      echo -e "LOG: R2 inference parsed" >> ${repRID}.seqwho.log
+    else
+      speciesR2=\${speciesR1}
+      seqtypeR2=\${seqtypeR1}
+      confidenceR2=\${confidenceR1}
+      speciesConfidenceR2="1"
+      seqtypeConfidenceR2="1"
+    fi
+    cp SeqWho_call.tsv SeqWho_call_full.tsv
+
+    speciesErrorSeqwho=false
+    speciesErrorSeqwho_details=""
+    seqtypeError=false
+    seqtypeError_details=""
+
+    # convert numeric confidence to string
+    if [ \${speciesConfidenceR1} == "1" ]
     then
-      trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]}
-      readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
+      speciesConfidenceR1="high"
+    else
+      speciesConfidenceR1="low"
     fi
-    echo -e "LOG: trimmed" >> ${repRID}.trimData.log
-    echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log
+    if [ \${speciesConfidenceR2} == "1" ]
+    then
+      speciesConfidenceR2="high"
+    else
+      speciesConfidenceR2="low"
+    fi
+    if [ \${seqtypeConfidenceR1} == "1" ]
+    then
+      seqtypeConfidenceR1="high"
+    else
+      seqtypeConfidenceR1="low"
+    fi
+    if [ \${seqtypeConfidenceR2} == "1" ]
+    then
+      seqtypeConfidenceR2="high"
+    else
+      seqtypeConfidenceR2="low"
+    fi
+    echo -e "LOG: confidence converted to string" >> ${repRID}.seqwho.log
 
-    # save read length file
-    echo "\${readLength}" > readLength.csv
+    # set species
+    if [ "\${speciesR1}" == "\${speciesR2}" ]
+    then
+      speciesInfer=\${speciesR1}
+      if [ "\${speciesInfer}" == "human" ]
+      then
+        speciesInfer="Homo sapiens"
+      elif [ "\${speciesInfer}" == "mouse" ]
+      then
+        speciesInfer="Mus musculus"
+      fi
+      echo -e "LOG: concordant species inference: \${speciesInfer}" >> ${repRID}.seqwho.log
+    else
+      speciesErrorSeqwho=true
+      speciesErrorSeqwho_details="**Infered species does not match for R1 and R2:** Infered R1 = \${speciesR1} and infered R2 = \${speciesR2}"
+      echo -e "LOG: inference error: \${speciesErrorSeqwho_details}" >> ${repRID}.seqwho.log
+    fi
+
+    # detect species confidence errors
+    if [ "\${speciesConfidenceR1}" == "high" ] && [ "\${speciesConfidenceR2}" == "high" ]
+    then
+      echo -e "LOG: high confidence species inference detected" >> ${repRID}.seqwho.log
+    else
+      speciesErrorSeqwho=true
+      speciesErrorSeqwho_details=\$(echo "**Infered species confidence is low:**\\n")
+      speciesErrorSeqwho_details=\$(echo \${speciesErrorSeqwho_details}"|fastq|Infered species confidence|\\n")
+      speciesErrorSeqwho_details=\$(echo \${speciesErrorSeqwho_details}"|:--|:--:|\\n")
+      speciesErrorSeqwho_details=\$(echo \${speciesErrorSeqwho_details}"|Read 1|\${speciesConfidenceR1}|\\n")
+      if [ "${ends}" == "pe" ]
+      then
+        speciesErrorSeqwho_details=\$(echo \${speciesErrorSeqwho_details}"|Read 2|\${speciesConfidenceR2}|\\n")
+      fi
+      echo -e "LOG: inference error: \${speciesErrorSeqwho_details}" >> ${repRID}.seqwho.log
+    fi
+
+    # detect seq type errors and set type
+    if [ "\${seqtypeConfidenceR1}" == "high" ] && [ "\${seqtypeConfidenceR2}" == "high" ]
+    then
+      echo -e "LOG: high confidence seq type inference detected" >> ${repRID}.seqwho.log
+      # set seq type
+      if [ "\${seqtypeR1}" == "\${seqtypeR2}" ]
+      then
+        if [ "\${seqtypeR1}" == "rnaseq" ]
+        then
+          seqtpeInfer="rnaseq"
+          echo -e "LOG: concordant rnaseq seq type inference detected" >> ${repRID}.seqwho.log
+        else
+          seqtypeError=true
+          seqtypeError_details="**Infered sequencing type is not mRNA-seq:** Infered = \${seqtypeR1}"
+          echo -e "LOG: inference error: \${seqtypeError_details}" >> ${repRID}.seqwho.log
+        fi
+      else
+        seqtypeError=true
+        seqtypeError_details="**Infered sequencing type does not match for R1 and R2:** Infered R1 = \${seqtypeR1} and infered R2 = \${seqtypeR2}"
+        echo -e "LOG: inference error: \${seqtypeError_details}" >> ${repRID}.seqwho.log
+      fi
+      consensus="-"
+    else
+      echo -e "LOG: low confidence seq type inference detected" >> ${repRID}.seqwho.log
+      seqtk sample -s100 ${fastq[0]} 1000000 1> sampled.1.seed100.fastq &
+      seqtk sample -s200 ${fastq[0]} 1000000 1> sampled.1.seed200.fastq &
+      seqtk sample -s300 ${fastq[0]} 1000000 1> sampled.1.seed300.fastq &
+      wait
+      gzip sampled.1.seed100.fastq &
+      gzip sampled.1.seed200.fastq &
+      gzip sampled.1.seed300.fastq &
+      wait
+      seqwho.py -f sampled.1.seed*.fastq.gz -x SeqWho.ix
+      seqtypeR1_1=\$(cat SeqWho_call.tsv | grep sampled.1.seed100.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      seqtypeR1_2=\$(cat SeqWho_call.tsv | grep sampled.1.seed200.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      seqtypeR1_3=\$(cat SeqWho_call.tsv | grep sampled.1.seed300.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+      cp SeqWho_call.tsv SeqWho_call_sampledR1.tsv
+      if [ "\${seqtypeR1_1}" == "\${seqtypeR1}" ] && [ "\${seqtypeR1_2}" == "\${seqtypeR1}" ] && [ "\${seqtypeR1_3}" == "\${seqtypeR1}" ]
+      then
+        consensus=true
+      else
+        consensus=false
+      fi
+      if [ "${ends}" == "pe" ]
+      then
+        seqtk sample -s100 ${fastq[1]} 1000000 1> sampled.2.seed100.fastq &
+        seqtk sample -s200 ${fastq[1]} 1000000 1> sampled.2.seed200.fastq &
+        seqtk sample -s300 ${fastq[1]} 1000000 1> sampled.2.seed300.fastq &
+        wait
+        gzip sampled.2.seed100.fastq &
+        gzip sampled.2.seed200.fastq &
+        gzip sampled.2.seed300.fastq &
+        wait
+        seqwho.py -f sampled.2.seed*.fastq.gz -x SeqWho.ix
+        seqtypeR2_1=\$(cat SeqWho_call.tsv | grep sampled.2.seed100.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+        seqtypeR2_2=\$(cat SeqWho_call.tsv | grep sampled.2.seed200.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+        seqtypeR2_3=\$(cat SeqWho_call.tsv | grep sampled.2.seed300.fastq.gz | cut -f18 -d\$'\t' | cut -f2 -d":" | tr -d " ")
+        cp SeqWho_call.tsv SeqWho_call_sampledR2.tsv
+        if [ "\${seqtypeR2_1}" == "\${seqtypeR1}" ] && [ "\${seqtypeR2_2}" == "\${seqtypeR1}" ] && [ "\${seqtypeR2_3}" == "\${seqtypeR1}" ]
+        then
+          consensus=\${consensus}
+        else
+          consensus=false
+        fi
+      fi
+      if [ \${consensus} == false ]
+      then
+        seqtypeError=true
+        seqtypeError_details=\$(echo "**Infered species confidence is low:**\\n")
+        seqtypeError_details=\$(echo \${seqtypeError_details}"|fastq|Infered seq type|Infered seq type confidence|\\n")
+        seqtypeError_details=\$(echo \${seqtypeError_details}"|:--|:--:|:--:|\\n")
+        seqtypeError_details=\$(echo \${seqtypeError_details}"|Read 1|\${seqtypeR1}|\${seqtypeConfidenceR1}|\\n")
+        if [ "${ends}" == "pe" ]
+        then
+          seqtypeError_details=\$(echo \${seqtypeError_details}"|Read 2|\${seqtypeR2}|\${seqtypeConfidenceR2}|\\n")
+        fi
+        echo -e "LOG: inference error: \${seqtypeError_details}" >> ${repRID}.seqwho.log
+      fi
+    fi
+
+    # check for species match error
+    if [ "${speciesMeta}" != "\${speciesInfer}" ]
+    then
+      if [ "${params.speciesForce}" != "" ]
+      then
+        speciesError=false
+        echo -e "LOG: species forced: Submitted=${speciesMeta}; Inferred=\${speciesInfer}; Forced=${params.speciesForce}" >> ${repRID}.seqwho.log
+      else
+        speciesError=true
+        echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=\${speciesInfer}" >> ${repRID}.seqwho.log
+      fi
+    else
+      speciesError=false
+      echo -e "LOG: species matches: Submitted=${speciesMeta}; Inferred=\${speciesInfer}" >> ${repRID}.seqwho.log
+    fi
+
+    # save seqwho multiqc report
+    echo -e "Read\tSeq Type\tSpecies\tSeq Type Confidence\tSeq Type Consensus\tSpecies Confidence" > seqwhoInfer.tsv
+    echo -e "Read 1\t\${seqtypeR1}\t\${speciesR1}\t\${seqtypeConfidenceR1}\t\${consensus}\t\${speciesConfidenceR1}" >> seqwhoInfer.tsv
+    if [ "${ends}" == "pe" ]
+    then
+      echo -e "Read 2\t\${seqtypeR2}\t\${speciesR2}\t\${seqtypeConfidenceR2}\t\${consensus}\t\${speciesConfidenceR2}" >> seqwhoInfer.tsv
+    fi
+    
+    # save species file
+    echo "\${speciesInfer}" > inferSpecies.csv
+
+    # save error file
+    echo "\${seqtypeError},\${seqtypeError_details},\${speciesErrorSeqwho},\${speciesErrorSeqwho_details},\${speciesError}" > inferError.csv
     """
 }
 
-// Extract calculated read length metadata into channel
-readLengthInfer = Channel.create()
-readLengthInfer_fl.splitCsv(sep: ",", header: false).separate(
-  readLengthInfer
+// Extract infered sepecies metadata into channel and replicate them for multiple process inputs
+speciesInfer = Channel.create()
+inferSpecies_fl.splitCsv(sep: ",", header: false).separate(
+  speciesInfer
 )
+speciesInfer.into {
+  speciesInfer_getRef
+  speciesInfer_alignSampleData
+  speciesInfer_checkMetadata
+  speciesInfer_aggrQC
+  speciesInfer_uploadExecutionRun
+  speciesInfer_uploadProcessedFile
+  speciesInfer_failExecutionRun
+}
 
-// Replicate inferred read length for multiple process inputs
-readLengthInfer.into {
-  readLengthInfer_aggrQC
-  readLengthInfer_uploadQC
+// extract seq type and species error into separate channel and replicate them for multiple process inputs
+seqtypeError = Channel.create()
+seqtypeError_details = Channel.create()
+speciesErrorSeqwho = Channel.create()
+speciesErrorSeqwho_details = Channel.create()
+speciesError = Channel.create()
+inferError_fl.splitCsv(sep: ",", header: false).separate(
+  seqtypeError,
+  seqtypeError_details,
+  speciesErrorSeqwho,
+  speciesErrorSeqwho_details,
+  speciesError
+)
+seqtypeError.into {
+  seqtypeError_trimData
+  seqtypeError_getRef
+  seqtypeError_downsampleData
+  seqtypeError_alignSampleDataERCC
+  seqtypeError_alignSampleData
+  seqtypeError_inferMetadata
+  seqtypeError_checkMetadata
+  seqtypeError_alignData
+  seqtypeError_dedupData
+  seqtypeError_makeBigWig
+  seqtypeError_countData
+  seqtypeError_dataQC
+  seqtypeError_aggrQC
+  seqtypeError_uploadExecutionRun
+  seqtypeError_uploadQC
+  seqtypeError_uploadProcessedFile
+  seqtypeError_uploadOutputBag
+  seqtypeError_finalizeExecutionRun
+  seqtypeError_uploadQC_fail
 }
-// Replicate trimmed fastq's for multiple process inputs
-fastqsTrim.into {
-  fastqsTrim_alignData
-  fastqsTrim_downsampleData
+speciesErrorSeqwho.into {
+  speciesErrorSeqwho_trimData
+  speciesErrorSeqwho_getRef
+  speciesErrorSeqwho_downsampleData
+  speciesErrorSeqwho_alignSampleDataERCC
+  speciesErrorSeqwho_alignSampleData
+  speciesErrorSeqwho_inferMetadata
+  speciesErrorSeqwho_checkMetadata
+  speciesErrorSeqwho_alignData
+  speciesErrorSeqwho_dedupData
+  speciesErrorSeqwho_makeBigWig
+  speciesErrorSeqwho_countData
+  speciesErrorSeqwho_dataQC
+  speciesErrorSeqwho_aggrQC
+  speciesErrorSeqwho_uploadExecutionRun
+  speciesErrorSeqwho_uploadQC
+  speciesErrorSeqwho_uploadProcessedFile
+  speciesErrorSeqwho_uploadOutputBag
+  speciesErrorSeqwho_finalizeExecutionRun
+  speciesErrorSeqwho_uploadQC_fail
+}
+speciesError.into {
+  speciesError_trimData
+  speciesError_getRef
+  speciesError_downsampleData
+  speciesError_alignSampleDataERCC
+  speciesError_alignSampleData
+  speciesError_inferMetadata
+  speciesError_checkMetadata
+  speciesError_alignData
+  speciesError_dedupData
+  speciesError_makeBigWig
+  speciesError_countData
+  speciesError_dataQC
+  speciesError_aggrQC
+  speciesError_uploadExecutionRun
+  speciesError_uploadQC
+  speciesError_uploadProcessedFile
+  speciesError_uploadOutputBag
+  speciesError_finalizeExecutionRun
+  speciesError_uploadQC_fail
 }
-
-// Combine inputs of getRefInfer
-getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refDataInfer.combine(fastqCountError_getRefInfer.combine(fastqReadError_getRefInfer.combine(fastqFileError_getRefInfer)))))
 
 /*
-  * getRefInfer: dowloads appropriate reference for metadata inference
-*/
-process getRefInfer {
-  tag "${refName}"
+ * getRefERCC: downloads ERCC reference for spike metadata inference
+ */
+process getRefERCC {
+  tag "${repRID}"
 
   input:
-    tuple val (refName), path (credential, stageAs: "credential.json"), path (script_refDataInfer), val (fastqCountError), val (fastqReadError), val (fastqFileError) from getRefInferInput
+    path (credential, stageAs: "credential.json") from deriva_getRefERCC
+    path script_refDataInfer
+    val fastqCountError from fastqCountError_getRefERCC
+    val fastqReadError from fastqReadError_getRefERCC
 
   output:
-    tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf")  into refInfer
-    path ("${refName}", type: 'dir') into bedInfer
+    tuple path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf")  into refERCC
 
   when:
-    fastqCountError == "false"
-    fastqReadError == "false"
-    fastqFileError == "false"
+    fastqCountError == "false" && fastqReadError == "false"
 
   script:
     """
-    hostname > ${repRID}.${refName}.getRefInfer.log
-    ulimit -a >> ${repRID}.${refName}.getRefInfer.log
+    hostname > ${repRID}.getRefERCC.log
+    ulimit -a >> ${repRID}.getRefERCC.log
 
     # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.${refName}.getRefInfer.log
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.getRefERCC.log
     mkdir -p ~/.deriva
     ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.${refName}.getRefInfer.log
+    echo -e "LOG: linked" >> ${repRID}.getRefERCC.log
 
     # set the reference name
-    if [ "${refName}" == "ERCC" ]
-    then
-      references=\$(echo ${referenceBase}/ERCC${refERCCVersion})
-    elif [ "${refName}" == "GRCm" ]
-    then
-      references=\$(echo ${referenceBase}/GRCm${refMoVersion})
-    elif [ '${refName}' == "GRCh" ]
-    then
-      references=\$(echo ${referenceBase}/GRCh${refHuVersion})
-    else
-      echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
-      exit 1
-    fi
+    references=\$(echo ${referenceBase}/ERCC${refERCCVersion})
 
     # retreive appropriate reference appropriate location
-    echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
+    echo -e "LOG: fetching ERCC reference files from ${referenceBase}" >> ${repRID}.getRefERCC.log
     if [ "${referenceBase}" == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
     then
       unzip \${references}.zip
       mv \$(basename \${references})/data/* .
     elif [ "${params.refSource}" == "datahub" ]
     then
-      GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
-      GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
-      GENCODE=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f3)
-      if [ "${refName}" != "ERCC" ]
-      then
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=false')
-      else
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${refName}${refERCCVersion}'/Annotation_Version='${refName}${refERCCVersion}'/Used_Spike_Ins=false')
-      fi
+      query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='ERCC${refERCCVersion}'/Annotation_Version='ERCC${refERCCVersion}'/Used_Spike_Ins=false')
       curl --request GET \${query} > refQuery.json
       refURL=\$(python ${script_refDataInfer} --returnParam URL)
       loc=\$(dirname \${refURL})
@@ -812,15 +1082,74 @@ process getRefInfer {
     fi
     mv ./annotation/genome.gtf .
     mv ./sequence/genome.fna .
-    mkdir ${refName}
-    if [ "${refName}" != "ERCC" ]
+    echo -e "LOG: fetched" >> ${repRID}.getRefERCC.log
+    """
+}
+
+/*
+ * trimData: trims any adapter or non-host sequences from the data
+ */
+process trimData {
+  tag "${repRID}"
+
+  input:
+    path (fastq) from fastqs_trimData
+    val ends from endsManual_trimData
+    val fastqCountError from fastqCountError_trimData
+    val fastqReadError from fastqReadError_trimData
+    val fastqFileError from fastqFileError_trimData
+    val seqtypeError from seqtypeError_trimData
+    val speciesErrorSeqwho from speciesErrorSeqwho_trimData
+    val speciesError from speciesError_trimData
+
+  output:
+    path ("*.fq.gz") into fastqsTrim
+    path ("*_trimming_report.txt") into trimQC
+    path ("readLength.csv") into readLengthInfer_fl
+
+  when:
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
+
+  script:
+    """
+    hostname > ${repRID}.trimData.log
+    ulimit -a >> ${repRID}.trimData.log
+    echo fastqFileError ${fastqFileError}
+
+    # trim fastq's using trim_galore and extract median read length
+    echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
+    if [ "${ends}" == "se" ]
+    then
+      trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]}
+      readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
+    elif [ "${ends}" == "pe" ]
     then
-      mv ./annotation/genome.bed ./${refName}
+      trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]}
+      readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
     fi
-    echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
+    echo -e "LOG: trimmed" >> ${repRID}.trimData.log
+    echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log
+
+    # save read length file
+    echo "\${readLength}" > readLength.csv
     """
 }
 
+// Extract calculated read length metadata into channel and replicate them for multiple process inputs
+readLengthInfer = Channel.create()
+readLengthInfer_fl.splitCsv(sep: ",", header: false).separate(
+  readLengthInfer
+)
+readLengthInfer.into {
+  readLengthInfer_aggrQC
+  readLengthInfer_uploadQC
+}
+// Replicate trimmed fastq's for multiple process inputs
+fastqsTrim.into {
+  fastqsTrim_downsampleData
+  fastqsTrim_alignData
+}
+
 /*
  * downsampleData: downsample fastq's for metadata inference
  */
@@ -830,18 +1159,18 @@ process downsampleData {
   input:
     path fastq from fastqsTrim_downsampleData
     val ends from endsManual_downsampleData
-    val fastqCountError_downsampleData
-    val fastqReadError_downsampleData
-    val fastqFileError_downsampleData
+    val fastqCountError from fastqCountError_downsampleData
+    val fastqReadError from fastqReadError_downsampleData
+    val fastqFileError from fastqFileError_downsampleData
+    val seqtypeError from seqtypeError_downsampleData
+    val speciesErrorSeqwho from speciesErrorSeqwho_downsampleData
+    val speciesError from speciesError_downsampleData
 
   output:
-    path ("sampled.1.fq") into fastqs1Sample
-    path ("sampled.2.fq") into fastqs2Sample
+    path ("sampled.{1,2}.fq") into fastqsSample
 
   when:
-    fastqCountError_downsampleData == "false"
-    fastqReadError_downsampleData == "false"
-    fastqFileError_downsampleData == "false"
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
 
   script:
     """
@@ -864,242 +1193,366 @@ process downsampleData {
     """
 }
 
-// Replicate the dowsampled fastq's and attatched to the references
-inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect().combine(fastqCountError_alignSampleData.combine(fastqReadError_alignSampleData.combine(fastqFileError_alignSampleData))))))
+// Replicate sampled fastq's for multiple process inputs
+fastqsSample.into {
+  fastqsSample_alignSampleDataERCC
+  fastqsSample_alignSampleData
+}
 
 /*
- * alignSampleData: aligns the downsampled reads to a reference database
-*/
-process alignSampleData {
-  tag "${ref}"
+ * alignSampleDataERCC: aligns the downsampled reads to the ERCC reference and infers spike in
+ */
+process alignSampleDataERCC {
+  tag "${repRID}"
 
   input:
-    tuple val (ends), val (ref), path (hisat2), path (fna), path (gtf), path (fastq1), path (fastq2), val (fastqCountError), val (fastqReadError), val (fastqFileError) from inferInput
+    val ends from endsManual_alignSampleDataERCC
+    tuple path (hisat2), path (fna), path (gtf) from refERCC
+    path fastq from fastqsSample_alignSampleDataERCC
+    val spikeForce
+    val fastqCountError from fastqCountError_alignSampleDataERCC
+    val fastqReadError from fastqReadError_alignSampleDataERCC
+    val fastqFileError from fastqFileError_alignSampleDataERCC
+    val seqtypeError from seqtypeError_alignSampleDataERCC
+    val speciesErrorSeqwho from speciesErrorSeqwho_alignSampleDataERCC
+    val speciesError from speciesError_alignSampleDataERCC
 
   output:
-    path ("${ref}.sampled.sorted.bam") into sampleBam
-    path ("${ref}.sampled.sorted.bam.bai") into sampleBai
-    path ("${ref}.alignSampleSummary.txt") into alignSampleQC
+    path "inferSpike.csv" into inferSpike_fl
+    path ("ERCC.alignSampleSummary.txt") into alignSampleQC_ERCC
 
   when:
-    fastqCountError == "false"
-    fastqReadError == "false"
-    fastqFileError == "false"
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesError == "false"
 
   script:
     """
-    hostname > ${repRID}.${ref}.alignSampleData.log
-    ulimit -a >> ${repRID}.${ref}.alignSampleData.log
+    hostname > ${repRID}.alignSampleDataERCC.log
+    ulimit -a >> ${repRID}.alignSampleDataERCC.log
 
     # align the reads with Hisat2
-    echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log
+    echo -e "LOG: aligning ${ends}" >> ${repRID}.alignSampleDataERCC.log
     if [ "${ends}" == "se" ]
     then
 
-      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary
+      hisat2 -p `nproc` --add-chrname -S ERCC.sampled.sam -x hisat2/genome -U ${fastq[0]} --summary-file ERCC.alignSampleSummary.txt --new-summary
     elif [ "${ends}" == "pe" ]
     then
-      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary
+      hisat2 -p `nproc` --add-chrname -S ERCC.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ERCC.alignSampleSummary.txt --new-summary
     fi
-    echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log
+    echo -e "LOG: aliged" >> ${repRID}.alignSampleDataERCC.log
 
     # convert the output sam file to a sorted bam file using Samtools
-    echo -e "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log
-    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam
+    echo -e "LOG: converting from sam to bam" >> ${repRID}.alignSampleDataERCC.log
+    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ERCC.sampled.bam ERCC.sampled.sam
 
     # sort the bam file using Samtools
-    echo -e "LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.log
+    echo -e "LOG: sorting the bam file" >> ${repRID}.alignSampleDataERCC.log
     proc=\$(expr `nproc` - 1)
     mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*')
     mem=\$(expr \${mem} / \${proc} \\* 85 / 100)
-    samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam
+    samtools sort -@ \${proc} -m \${mem}K -O BAM -o ERCC.sampled.sorted.bam ERCC.sampled.bam
 
     # index the sorted bam using Samtools
-    echo -e "LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.log
-    samtools index -@ `nproc` -b ${ref}.sampled.sorted.bam ${ref}.sampled.sorted.bam.bai
+    echo -e "LOG: indexing sorted bam file" >> ${repRID}.alignSampleDataERCC.log
+    samtools index -@ `nproc` -b ERCC.sampled.sorted.bam ERCC.sampled.sorted.bam.bai
+
+    # collect alignment rates (round down to integers)
+    align=\$(echo \$(grep "Overall alignment rate" ERCC.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    align=\$(echo \${align%.*})
+    echo -e "LOG: alignment rate to ERCC: \${align}" >> ${repRID}.alignSampleDataERCC.log
+
+    # determine spike-in
+    if [ 1 -eq \$(echo \$(expr \${align} ">=" 10)) ]
+    then
+      spike="true"
+    else
+      spike="false"
+    fi
+    echo -e "LOG: inference of strandedness results is: \${spike}" >> ${repRID}.alignSampleDataERCC.log
+    if [ "${spikeForce}" != "" ]
+    then
+      spike=${spikeForce}
+      echo -e "LOG: spike-in metadata forced: \${spike}" >> ${repRID}.alignSampleDataERCC.log
+    fi
+
+    # write inferred spike metadata to file
+    echo "\${spike},\${align}" > inferSpike.csv
     """
 }
 
-alignSampleQC.into {
-  alignSampleQC_inferMetadata
-  alignSampleQC_aggrQC
+// Extract spike in metadata and % aligned to ERCC into channel and replicate them for multiple process inputs
+spikeInfer = Channel.create()
+alignInferERCC = Channel.create()
+inferSpike_fl.splitCsv(sep: ",", header: false).separate(
+  spikeInfer,
+  alignInferERCC
+)
+spikeInfer.into {
+  spikeInfer_getRef
+  spikeInfer_checkMetadata
+  spikeInfer_aggrQC
+  spikeInfer_uploadExecutionRun
+  spikeInfer_failExecutionRun
 }
 
+/*
+ * getRef: downloads appropriate reference
+ */
+process getRef {
+  tag "${species}"
+
+  input:
+    path script_refData
+    path credential, stageAs: "credential.json" from deriva_getRef
+    val spike from spikeInfer_getRef
+    val species from speciesInfer_getRef
+    val fastqCountError from fastqCountError_getRef
+    val fastqReadError from fastqReadError_getRef
+    val fastqFileError from fastqFileError_getRef
+    val seqtypeError from seqtypeError_getRef
+    val speciesErrorSeqwho from speciesErrorSeqwho_getRef
+    val speciesError from speciesError_getRef
+
+  output:
+    tuple path ("hisat2", type: 'dir'), path ("*.bed"), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv")  into reference
+
+  when:
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
+
+  script:
+    """
+    hostname > ${repRID}.getRef.log
+    ulimit -a >> ${repRID}.getRef.log
+
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.getRef.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.getRef.log
+
+    # set the reference name
+    if [ "${species}" == "Mus musculus" ]
+    then
+      reference=\$(echo ${referenceBase}/GRCm${refMoVersion})
+      refName=GRCm
+    elif [ '${species}' == "Homo sapiens" ]
+    then
+      reference=\$(echo ${referenceBase}/GRCh${refHuVersion})
+      refName=GRCh
+    else
+      echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species}" >> ${repRID}.getRef.log
+      exit 1
+    fi
+    if [ "${spike}" == "true" ]
+    then
+      reference=\$(echo \${reference}-S)
+    elif [ "${spike}" == "false" ]
+    then
+      reference=\$(echo \${reference})
+    fi
+    echo -e "LOG: species set to \${reference}" >> ${repRID}.getRef.log
+
+    # retreive appropriate reference appropriate location
+    echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
+    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
+    then
+      echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
+      unzip \${reference}.zip
+      mv \$(basename \${reference})/data/* .
+    elif [ "${params.refSource}" == "datahub" ]
+    then
+      echo -e "LOG: grabbing reference files from datahub" >> ${repRID}.getRef.log
+      GRCv=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f1)
+      GRCp=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f2)
+      GENCODE=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f3)
+      if [ "${spike}" == "true" ]
+      then
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=true')
+      else
+        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=false')
+      fi
+      curl --request GET \${query} > refQuery.json
+      refURL=\$(python ${script_refData} --returnParam URL)
+      loc=\$(dirname \${refURL})
+      fName=\$(python ${script_refData} --returnParam fName)
+      fName=\${fName%.*}
+      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
+      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
+      deriva-hatrac-cli --host ${referenceBase} get \${refURL}
+      unzip \$(basename \${refURL})
+      mv \${fName}/data/* .
+    fi
+    echo -e "LOG: fetched" >> ${repRID}.getRef.log
+
+    mv ./annotation/genome.gtf .
+    mv ./sequence/genome.fna .
+    mv ./annotation/genome.bed .
+    mv ./metadata/Entrez.tsv .
+    mv ./metadata/geneID.tsv .
+    """
+}
+
+// Replicate reference for multiple process inputs
+reference.into {
+  reference_alignSampleData
+  reference_inferMetadata
+  reference_alignData
+  reference_countData
+  reference_dataQC
+}
+/*
+ * alignSampleData: aligns the downsampled reads to the appripriate species reference
+ */
+process alignSampleData {
+  tag "${repRID}"
+
+  input:
+    path fastqSample from fastqsSample_alignSampleData
+    path reference_alignSampleData
+    val endsManual from endsManual_alignSampleData
+    val speciesInfer from speciesInfer_alignSampleData
+    val fastqCountError from fastqCountError_alignSampleData
+    val fastqReadError from fastqReadError_alignSampleData
+    val fastqFileError from fastqFileError_alignSampleData
+    val seqtypeError from seqtypeError_alignSampleData
+    val speciesErrorSeqwho from speciesErrorSeqwho_alignSampleData
+    val speciesError from speciesError_alignSampleData
+
+  output:
+    path ("sampled.bam") into sampledBam
+    path "align.csv" into align_fl
+    path ("*.alignSampleSummary.txt") into alignSampleQC
+
+  when:
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
+
+  script:
+    """
+    hostname > ${repRID}.alignSampleData.log
+    ulimit -a >> ${repRID}.alignSampleData.log
+
+    # align the sampled reads with Hisat2
+    species="${speciesInfer}"
+    species=\${species// /_}
+    echo -e "LOG: aligning ${endsManual}" >> ${repRID}.alignSampleData.log
+    if [ "${endsManual}" == "se" ]
+    then
+      hisat2 -p `nproc` --add-chrname -S sampled.sam -x hisat2/genome -U ${fastqSample[0]} --summary-file \${species}.alignSampleSummary.txt --new-summary
+    elif [ "${endsManual}" == "pe" ]
+    then
+      hisat2 -p `nproc` --add-chrname -S sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastqSample[0]} -2 ${fastqSample[1]} --summary-file \${species}.alignSampleSummary.txt --new-summary
+    fi
+    echo -e "LOG: aligned sampled reads" >> ${repRID}.alignSampleData.log
+
+    # collect alignment rates (round down to integers)
+    align=\$(echo \$(grep "Overall alignment rate" \${species}.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    align=\$(echo \${align%.*})
+
+    # convert the sampled read output sam file to a sorted bam file using Samtools
+    echo -e "LOG: converting sampled reads from sam to bam" >> ${repRID}.alignSampleData.log
+    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o sampled.bam sampled.sam
+
+    echo "\${align}" > align.csv
+    """
+}
+
+// Extract % aligned to appropriate reference into channel
+alignInfer = Channel.create()
+align_fl.splitCsv(sep: ",", header: false).separate(
+  alignInfer
+)
+
+/*
+ * inferMetadata: infers strandedness and endness from the aligned downsampled reads
+ */
 process inferMetadata {
   tag "${repRID}"
 
   input:
+    path sampledBam
+    path reference_inferMetadata
     path script_inferMeta
-    path beds from bedInfer.collect()
-    path bam from sampleBam.collect()
-    path bai from sampleBai.collect()
-    path alignSummary from alignSampleQC_inferMetadata.collect()
+    val endsForce
     val strandedForce
-    val spikeForce
-    val fastqCountError_inferMetadata
-    val fastqReadError_inferMetadata
-    val fastqFileError_inferMetadata
+    val fastqCountError from fastqCountError_inferMetadata
+    val fastqReadError from fastqReadError_inferMetadata
+    val fastqFileError from fastqFileError_inferMetadata
+    val seqtypeError from seqtypeError_inferMetadata
+    val speciesErrorSeqwho from speciesErrorSeqwho_inferMetadata
+    val speciesError from speciesError_inferMetadata
 
   output:
     path "infer.csv" into inferMetadata_fl
     path "${repRID}.infer_experiment.txt" into inferExperiment
-    path "speciesError.csv" into speciesError_fl
 
   when:
-    fastqCountError_inferMetadata == "false"
-    fastqReadError_inferMetadata == "false"
-    fastqFileError_inferMetadata == "false"
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
 
   script:
     """
     hostname > ${repRID}.inferMetadata.log
     ulimit -a >> ${repRID}.inferMetadata.log
 
-    # collect alignment rates (round down to integers)
-    align_ercc=\$(echo \$(grep "Overall alignment rate" ERCC.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_ercc=\$(echo \${align_ercc%.*})
-    echo -e "LOG: alignment rate to ERCC: \${align_ercc}" >> ${repRID}.inferMetadata.log
-    align_hu=\$(echo \$(grep "Overall alignment rate" GRCh.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_hu=\$(echo \${align_hu%.*})
-    echo -e "LOG: alignment rate to GRCh: \${align_hu}" >> ${repRID}.inferMetadata.log
-    align_mo=\$(echo \$(grep "Overall alignment rate" GRCm.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
-    align_mo=\$(echo \${align_mo%.*})
-    echo -e "LOG: alignment rate to GRCm: \${align_mo}" >> ${repRID}.inferMetadata.log
+    # infer experimental setting from dedup bam
+    echo -e "LOG: infer experimental setting from bam" >> ${repRID}.inferMetadata.log
+    infer_experiment.py -r ./genome.bed -i ${sampledBam} 1>> ${repRID}.infer_experiment.txt
+    echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
 
-    # determine spike-in
-    if [ 1 -eq \$(echo \$(expr \${align_ercc} ">=" 10)) ]
+    ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
+    fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
+    if [ \${ended} == "PairEnd" ]
     then
-      spike="true"
-    else
-      spike="false"
-    fi
-    echo -e "LOG: inference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
-    if [ "${spikeForce}" != "" ]
+      ends="pe"
+      percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
+      percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
+    elif [ \${ended} == "SingleEnd" ]
     then
-      spike=${spikeForce}
-      echo -e "LOG: spike-in metadata forced: \${spike}" >> ${repRID}.parseMetadata.log
+      ends="se"
+      percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
+      percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
     fi
-
-    speciesError=false
-    speciesError_details=""
-    # determine species
-    if [ 1 -eq \$(echo \$(expr \${align_hu} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_mo} "<" 40)) ]
+    echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
+    echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
+    if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
     then
-      species="Homo sapiens"
-      bam="GRCh.sampled.sorted.bam"
-      bed="./GRCh/genome.bed"
-      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
-    elif [ 1 -eq \$(echo \$(expr \${align_mo} ">=" 40)) ] && [ 1 -eq \$(echo \$(expr \${align_hu} "<" 40)) ]
+      stranded="forward"
+    elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
     then
-      species="Mus musculus"
-      bam="GRCm.sampled.sorted.bam"
-      bed="./GRCm/genome.bed"
-      echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
+      stranded="reverse"
     else
-      echo -e "LOG: ERROR - inference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
-      if [ "${speciesForce}" == "" ]
-      then
-        speciesError=true
-        speciesError_details="**Inference of species returns an ambiguous result:** Percent aligned to human = \${align_hu} and percent aligned to mouse = \${align_mo}"
-      fi
+      stranded="unstranded"
     fi
-    if [ "${speciesForce}" != "" ]
+    echo -e "LOG: ends set to: \${ends}" >> ${repRID}.inferMetadata.log
+    if [ "${endsForce}" != "" ]
     then
-      speciesError=false
-      echo -e "LOG: species overridden to: ${speciesForce}"
-      species="${speciesForce}"
-      if [ "${speciesForce}" == "Homo sapiens" ]
-      then
-        bam="GRCh.sampled.sorted.bam"
-        bed="./GRCh/genome.bed"
-      elif [ "${speciesForce}" == "Mus musculus" ]
-      then
-        bam="GRCm.sampled.sorted.bam"
-        bed="./GRCm/genome.bed"
-      fi
+      ends=${endsForce}
+      echo -e "LOG: ends metadata forced: \${ends}" >> ${repRID}.inferMetadata.log
     fi
-
-    if [ "\${speciesError}" == false ]
+    echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
+    if [ "${strandedForce}" != "" ]
     then
-      # infer experimental setting from dedup bam
-      echo -e "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
-      infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
-      echo -e "LOG: inferred" >> ${repRID}.inferMetadata.log
-
-      ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
-      fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
-      if [ \${ended} == "PairEnd" ]
-      then
-        ends="pe"
-        percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
-        percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
-      elif [ \${ended} == "SingleEnd" ]
-      then
-        ends="se"
-        percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
-        percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
-      fi
-      echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
-      echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
-      if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
-      then
-        stranded="forward"
-      elif [ 1 -eq \$(echo \$(expr \${percentR#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentF#*.} "<" 2500)) ]
-      then
-        stranded="reverse"
-      else
-        stranded="unstranded"
-      fi
-      echo -e "LOG: stradedness set to: \${stranded}" >> ${repRID}.inferMetadata.log
-      if [ "${strandedForce}" != "" ]
-      then
-        stranded=${strandedForce}
-        echo -e "LOG: spike-in metadata forced: \${stranded}" >> ${repRID}.inferMetadata.log
-      fi
-    else
-      ends=""
-      stranded=""
-      spike=""
-      species=""
-      percentF=""
-      percentR=""
-      fail=""
-      touch ${repRID}.infer_experiment.txt
+      stranded=${strandedForce}
+      echo -e "LOG: spike-in metadata forced: \${stranded}" >> ${repRID}.inferMetadata.log
     fi
 
     # write inferred metadata to file
-    echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" > infer.csv
-
-    # save species error file
-    echo "\${speciesError},\${speciesError_details}" > speciesError.csv
+    echo "\${ends},\${stranded},\${percentF},\${percentR},\${fail}" > infer.csv
     """
 }
 
-// Split metadata into separate channels
+// Extract metadata and replicate them for multiple process inputs
 endsInfer = Channel.create()
 strandedInfer = Channel.create()
-spikeInfer = Channel.create()
-speciesInfer = Channel.create()
-align_erccInfer = Channel.create()
-align_huInfer = Channel.create()
-align_moInfer = Channel.create()
 percentFInfer = Channel.create()
 percentRInfer = Channel.create()
 failInfer = Channel.create()
 inferMetadata_fl.splitCsv(sep: ",", header: false).separate(
   endsInfer,
   strandedInfer,
-  spikeInfer,
-  speciesInfer,
-  align_erccInfer,
-  align_huInfer,
-  align_moInfer,
   percentFInfer,
   percentRInfer,
   failInfer
 )
-
-// Replicate metadata for multiple process inputs
 endsInfer.into {
   endsInfer_checkMetadata
   endsInfer_alignData
@@ -1117,52 +1570,10 @@ strandedInfer.into {
   strandedInfer_uploadQC
   strandedInfer_failExecutionRun
 }
-spikeInfer.into{
-  spikeInfer_checkMetadata
-  spikeInfer_getRef
-  spikeInfer_aggrQC
-  spikeInfer_uploadExecutionRun
-  spikeInfer_failExecutionRun
-}
-speciesInfer.into {
-  speciesInfer_checkMetadata
-  speciesInfer_getRef
-  speciesInfer_aggrQC
-  speciesInfer_uploadExecutionRun
-  speciesInfer_uploadProcessedFile
-  speciesInfer_failExecutionRun
-}
-
-// Split species count error into separate channel
-speciesError = Channel.create()
-speciesError_details = Channel.create()
-speciesError_fl.splitCsv(sep: ",", header: false).separate(
-  speciesError,
-  speciesError_details
-)
-
-//  Replicate errors for multiple process inputs
-speciesError.into {
-  speciesError_checkMetadata
-  speciesError_uploadExecutionRun
-  speciesError_getRef
-  speciesError_alignData
-  speciesError_dedupData
-  speciesError_makeBigWig
-  speciesError_countData
-  speciesError_fastqc
-  speciesError_dataQC
-  speciesError_aggrQC
-  speciesError_uploadQC
-  speciesError_uploadQC_fail
-  speciesError_uploadProcessedFile
-  speciesError_uploadOutputBag
-  speciesError_failPreExecutionRun_species
-}
 
 /* 
- * checkMetadata: checks the submitted metada against inferred
-*/
+ * checkMetadata: checks the submitted metadata against inferred
+ */
 process checkMetadata {
   tag "${repRID}"
 
@@ -1175,20 +1586,19 @@ process checkMetadata {
     val strandedInfer from strandedInfer_checkMetadata
     val spikeInfer from spikeInfer_checkMetadata
     val speciesInfer from speciesInfer_checkMetadata
-    val fastqCountError_checkMetadata
-    val fastqReadError_checkMetadata
-    val fastqFileError_checkMetadata
-    val speciesError_checkMetadata
+    val fastqCountError from fastqCountError_checkMetadata
+    val fastqReadError from fastqReadError_checkMetadata
+    val fastqFileError from fastqFileError_checkMetadata
+    val seqtypeError from seqtypeError_checkMetadata
+    val speciesErrorSeqwho from speciesErrorSeqwho_checkMetadata
+    val speciesError from speciesError_checkMetadata
 
   output:
     path ("check.csv") into checkMetadata_fl
     path ("outputBagRID.csv") optional true into outputBagRID_fl_dummy
 
   when:
-    fastqCountError_checkMetadata == "false"
-    fastqReadError_checkMetadata == "false"
-    fastqFileError_checkMetadata == "false"
-    speciesError_checkMetadata == "false"
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
 
   script:
     """
@@ -1232,378 +1642,77 @@ process checkMetadata {
     fi
     if [ "${endsMeta}" != "${endsInfer}" ]
     then
-      pipelineError=true
-      pipelineError_ends=true
-      echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
-    else
-      pipelineError_ends=false
-      echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
-    fi
-    if [ "${spikeMeta}" != "${spikeInfer}" ]
-    then
-      if [[ "${params.spikeForce}" != "" ]]
+      if [ "${params.endsForce}" != "" ]
       then
-        pipelineError_spike=false
-        echo -e "LOG: spike forced: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-      else
-        pipelineError=true
-        pipelineError_spike=true
-        echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-      fi
-    else
-      pipelineError_spike=false
-      echo -e "LOG: spike matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
-    fi
-    if [ "${speciesMeta}" != "${speciesInfer}" ]
-    then
-    if [[ "${params.speciesForce}" != "" ]]
-      then
-        pipelineError_species=false
-        echo -e "LOG: species forced: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
+        pipelineError=false
+        pipelineError_ends=false
+        echo -e "LOG: ends forced: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
       else
         pipelineError=true
-        pipelineError_species=true
-        echo -e "LOG: species does not match: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
+        pipelineError_ends=true
+        echo -e "LOG: ends do not match: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
       fi
     else
-      pipelineError_species=false
-      echo -e "LOG: species matches: Submitted=${speciesMeta}; Inferred=${speciesInfer}" >> ${repRID}.checkMetadata.log
-    fi
-
-    # create dummy output bag rid if failure
-    if [ \${pipelineError} == true ]
-    then
-      echo "fail" > outputBagRID.csv
-    fi
-
-    # write checks to file
-    echo "\${pipelineError},\${pipelineError_ends},\${pipelineError_stranded},\${pipelineError_spike},\${pipelineError_species}" > check.csv
-    """
-}
-
-// Split errors into separate channels
-pipelineError = Channel.create()
-pipelineError_ends = Channel.create()
-pipelineError_stranded = Channel.create()
-pipelineError_spike = Channel.create()
-pipelineError_species = Channel.create()
-checkMetadata_fl.splitCsv(sep: ",", header: false).separate(
-  pipelineError,
-  pipelineError_ends,
-  pipelineError_stranded,
-  pipelineError_spike,
-  pipelineError_species
-)
-
-// Replicate errors for multiple process inputs
-pipelineError.into {
-  pipelineError_getRef
-  pipelineError_alignData
-  pipelineError_dedupData
-  pipelineError_makeBigWig
-  pipelineError_countData
-  pipelineError_fastqc
-  pipelineError_dataQC
-  pipelineError_aggrQC
-  pipelineError_uploadQC
-  pipelineError_uploadQC_fail
-  pipelineError_uploadProcessedFile
-  pipelineError_uploadOutputBag
-  pipelineError_failExecutionRun
-}
-
-/* 
- * uploadInputBag: uploads the input bag
-*/
-process uploadInputBag {
-  tag "${repRID}"
-
-  input:
-    path script_uploadInputBag
-    path credential, stageAs: "credential.json" from deriva_uploadInputBag
-    path inputBag from inputBag_uploadInputBag
-    val studyRID from studyRID_uploadInputBag
-
-  output:
-    path ("inputBagRID.csv") into inputBagRID_fl
-
-  when:
-    upload
-
-  script:
-    """
-    hostname > ${repRID}.uploadInputBag.log
-    ulimit -a >> ${repRID}.uploadInputBag.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadInputBag.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.uploadInputBag.log
-
-    yr=\$(date +'%Y')
-    mn=\$(date +'%m')
-    dy=\$(date +'%d')
-
-    file=\$(basename -a ${inputBag})
-    md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
-    echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
-    size=\$(wc -c < ./\${file})
-    echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
-    
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
-    if [ "\${exist}" == "[]" ]
-    then
-        cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-        cookie=\${cookie:11:-1}
-
-        loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
-        inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
-        echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
-        rid=\${inputBag_rid}
-    else
-        exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-        exist=\${exist:7:-6}
-        echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
-        rid=\${exist}
-    fi
-
-    echo "\${rid}" > inputBagRID.csv
-    """
-}
-
-// Extract input bag RID into channel
-inputBagRID = Channel.create()
-inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
-  inputBagRID
-)
-
-// Replicate input bag RID for multiple process inputs
-inputBagRID.into {
-  inputBagRID_uploadExecutionRun
-  inputBagRID_finalizeExecutionRun
-  inputBagRID_failPreExecutionRun
-  inputBagRID_failExecutionRun
-}
-
-/* 
- * uploadExecutionRun: uploads the execution run
-*/
-process uploadExecutionRun {
-  tag "${repRID}"
-
-  input:
-    path script_uploadExecutionRun_uploadExecutionRun
-    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
-    val spike from spikeInfer_uploadExecutionRun
-    val species from speciesInfer_uploadExecutionRun
-    val inputBagRID from inputBagRID_uploadExecutionRun
-    val fastqCountError_uploadExecutionRun
-    val fastqReadError_uploadExecutionRun
-    val fastqFileError_uploadExecutionRun
-    val speciesError_uploadExecutionRun
-    
-  output:
-    path ("executionRunRID.csv") into executionRunRID_fl
-
-  when:
-    upload
-    fastqCountError_uploadExecutionRun == "false"
-    fastqReadError_uploadExecutionRun == "false"
-    fastqFileError_uploadExecutionRun == "false"
-    speciesError_uploadExecutionRun == "false"
-
-  script:
-    """
-    hostname > ${repRID}.uploadExecutionRun.log
-    ulimit -a >> ${repRID}.uploadExecutionRun.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadExecutionRun.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.uploadExecutionRun.log
-
-    echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
-    workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
-    workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    workflow=\${workflow:7:-6}
-    echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
-
-    if [ "${species}" == "Homo sapiens" ]
-    then
-      genomeName=\$(echo GRCh${refHuVersion})
-    elif [ "${species}" == "Mus musculus" ]
-    then
-      genomeName=\$(echo GRCm${refMoVersion})
-    fi
-    if [ "${spike}" == "true" ]
-    then
-      genomeName=\$(echo \${genomeName}-S)
-    fi
-    echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
-    genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
-    genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-    genome=\${genome:7:-6}
-    echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
-
-    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
-    cookie=\${cookie:11:-1}
-
-    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
-    echo \${exist} >> ${repRID}.uploadExecutionRun.log
-    if [ "\${exist}" == "[]" ]
-    then
-      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
-      echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-    else
-      rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
-      rid=\${rid:7:-6}
-      echo \${rid} >> ${repRID}.uploadExecutionRun.log
-      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
-      echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
-    fi
-
-    echo "\${executionRun_rid}" > executionRunRID.csv
-
-    if [ ${params.track} == true ]
-    then
-      curl -H 'Content-Type: application/json' -X PUT -d \
-        '{ \
-          "ID": "${workflow.sessionId}", \
-          "ExecutionRunRID": "'\${executionRun_rid}'" \
-        }' \
-        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
-    fi
-    """
-}
-
-// Extract execution run RID into channel
-executionRunRID = Channel.create()
-executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
-  executionRunRID
-)
-
-// Replicate execution run RID for multiple process inputs
-executionRunRID.into {
-  executionRunRID_uploadQC
-  executionRunRID_uploadProcessedFile
-  executionRunRID_uploadOutputBag
-  executionRunRID_finalizeExecutionRun
-  executionRunRID_failExecutionRun
-  executionRunRID_fail
-}
-
-/*
-  * getRef: downloads appropriate reference
-*/
-process getRef {
-  tag "${species}"
-
-  input:
-    path script_refData
-    path credential, stageAs: "credential.json" from deriva_getRef
-    val spike from spikeInfer_getRef
-    val species from speciesInfer_getRef
-    val fastqCountError_getRef
-    val fastqReadError_getRef
-    val fastqFileError_getRef
-    val speciesError_getRef
-    val pipelineError_getRef
-
-  output:
-    tuple path ("hisat2", type: 'dir'), path ("*.bed"), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv")  into reference
-
-  when:
-    fastqCountError_getRef == "false"
-    fastqReadError_getRef == "false"
-    fastqFileError_getRef == "false"
-    speciesError_getRef == "false"
-    pipelineError_getRef == "false"
-
-  script:
-    """
-    hostname > ${repRID}.getRef.log
-    ulimit -a >> ${repRID}.getRef.log
-
-    # link credential file for authentication
-    echo -e "LOG: linking deriva credentials" >> ${repRID}.getRef.log
-    mkdir -p ~/.deriva
-    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
-    echo -e "LOG: linked" >> ${repRID}.getRef.log
-
-    # set the reference name
-    if [ "${species}" == "Mus musculus" ]
-    then
-      reference=\$(echo ${referenceBase}/GRCm${refMoVersion})
-      refName=GRCm
-    elif [ '${species}' == "Homo sapiens" ]
-    then
-      reference=\$(echo ${referenceBase}/GRCh${refHuVersion})
-      refName=GRCh
-    else
-      echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species}" >> ${repRID}.getRef.log
-      exit 1
-    fi
-    if [ "${spike}" == "true" ]
-    then
-      reference=\$(echo \${reference}-S)
-    elif [ "${spike}" == "false" ]
-    then
-      reference=\$(echo \${reference})
+      pipelineError_ends=false
+      echo -e "LOG: ends matches: Submitted=${endsMeta}; Inferred=${endsInfer}" >> ${repRID}.checkMetadata.log
     fi
-    echo -e "LOG: species set to \${reference}" >> ${repRID}.getRef.log
-
-    # retreive appropriate reference appropriate location
-    echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
-    if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references/new" ]
-    then
-      echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
-      unzip \${reference}.zip
-      mv \$(basename \${reference})/data/* .
-    elif [ ${params.refSource} == "datahub" ]
+    if [ "${spikeMeta}" != "${spikeInfer}" ]
     then
-      echo -e "LOG: grabbing reference files from datahub" >> ${repRID}.getRef.log
-      GRCv=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f1)
-      GRCp=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f2)
-      GENCODE=\$(echo \${reference} | grep -o \${refName}.* | cut -d '.' -f3)
-      if [ "${spike}" == "true" ]
+      if [[ "${params.spikeForce}" != "" ]]
       then
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=true')
+        pipelineError_spike=false
+        echo -e "LOG: spike forced: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
       else
-        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE}'/Used_Spike_Ins=false')
+        pipelineError=true
+        pipelineError_spike=true
+        echo -e "LOG: spike does not match: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
       fi
-      curl --request GET \${query} > refQuery.json
-      refURL=\$(python ${script_refData} --returnParam URL)
-      loc=\$(dirname \${refURL})
-      fName=\$(python ${script_refData} --returnParam fName)
-      fName=\${fName%.*}
-      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
-      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
-      deriva-hatrac-cli --host ${referenceBase} get \${refURL}
-      unzip \$(basename \${refURL})
-      mv \${fName}/data/* .
+    else
+      pipelineError_spike=false
+      echo -e "LOG: spike matches: Submitted=${spikeMeta}; Inferred=${spikeInfer}" >> ${repRID}.checkMetadata.log
     fi
-    echo -e "LOG: fetched" >> ${repRID}.getRef.log
 
-    mv ./annotation/genome.gtf .
-    mv ./sequence/genome.fna .
-    mv ./annotation/genome.bed .
-    mv ./metadata/Entrez.tsv .
-    mv ./metadata/geneID.tsv .
+    # create dummy output bag rid if failure
+    if [ \${pipelineError} == true ]
+    then
+      echo "fail" > outputBagRID.csv
+    fi
+
+    # write checks to file
+    echo "\${pipelineError},\${pipelineError_ends},\${pipelineError_stranded},\${pipelineError_spike},\${pipelineError_species}" > check.csv
     """
 }
 
-// Replicate reference for multiple process inputs
-reference.into {
-  reference_alignData
-  reference_countData
-  reference_dataQC
+// Split errors into separate channels and replicate them for multiple process inputs
+pipelineError = Channel.create()
+pipelineError_ends = Channel.create()
+pipelineError_stranded = Channel.create()
+pipelineError_spike = Channel.create()
+pipelineError_species = Channel.create()
+checkMetadata_fl.splitCsv(sep: ",", header: false).separate(
+  pipelineError,
+  pipelineError_ends,
+  pipelineError_stranded,
+  pipelineError_spike,
+  pipelineError_species
+)
+pipelineError.into {
+  pipelineError_dedupData
+  pipelineError_makeBigWig
+  pipelineError_countData
+  pipelineError_dataQC
+  pipelineError_aggrQC
+  pipelineError_uploadQC
+  pipelineError_uploadProcessedFile
+  pipelineError_uploadOutputBag
+  pipelineError_failExecutionRun
+  pipelineError_finalizeExecutionRun
+  pipelineError_uploadQC_fail
 }
 
 /*
- * alignData: aligns the reads to a reference database
-*/
+ * alignData: aligns the reads to the appripriate species reference
+ */
 process alignData {
   tag "${repRID}"
 
@@ -1612,27 +1721,24 @@ process alignData {
     path reference_alignData
     val ends from endsInfer_alignData
     val stranded from strandedInfer_alignData
-    val fastqCountError_alignData
-    val fastqReadError_alignData
-    val fastqFileError_alignData
-    val speciesError_alignData
-    val pipelineError_alignData
+    val fastqCountError from fastqCountError_alignData
+    val fastqReadError from fastqReadError_alignData
+    val fastqFileError from fastqFileError_alignData
+    val seqtypeError from seqtypeError_alignData
+    val speciesErrorSeqwho from speciesErrorSeqwho_alignData
+    val speciesError from speciesError_alignData
 
   output:
     tuple path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam
     path ("*.alignSummary.txt") into alignQC
 
   when:
-    fastqCountError_alignData == "false"
-    fastqReadError_alignData == "false"
-    fastqFileError_alignData == "false"
-    speciesError_alignData == "false"
-    pipelineError_alignData == "false"
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
 
   script:
     """
-    hostname > ${repRID}.align.log
-    ulimit -a >> ${repRID}.align.log
+    hostname > ${repRID}.alignData.log
+    ulimit -a >> ${repRID}.alignData.log
 
     # set stranded param for hisat2
     if [ "${stranded}"=="unstranded" ]
@@ -1653,7 +1759,7 @@ process alignData {
     fi
 
     # align the reads with Hisat2
-    echo -e "LOG: aligning ${ends}" >> ${repRID}.align.log
+    echo -e "LOG: aligning ${ends}" >> ${repRID}.alignData.log
     if [ "${ends}" == "se" ]
     then
       hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary
@@ -1661,44 +1767,41 @@ process alignData {
     then
       hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary
     fi
-    echo -e "LOG: alignined" >> ${repRID}.align.log
+    echo -e "LOG: alignined" >> ${repRID}.alignData.log
 
     # convert the output sam file to a sorted bam file using Samtools
-    echo -e "LOG: converting from sam to bam" >> ${repRID}.align.log
+    echo -e "LOG: converting from sam to bam" >> ${repRID}.alignData.log
     samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam
 
     # sort the bam file using Samtools
-    echo -e "LOG: sorting the bam file" >> ${repRID}.align.log
+    echo -e "LOG: sorting the bam file" >> ${repRID}.alignData.log
     proc=\$(expr `nproc` - 1)
     mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*')
     mem=\$(expr \${mem} / \${proc} \\* 75 / 100)
     samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${repRID}.sorted.bam ${repRID}.bam
 
     # index the sorted bam using Samtools
-    echo -e "LOG: indexing sorted bam file" >> ${repRID}.align.log
+    echo -e "LOG: indexing sorted bam file" >> ${repRID}.alignData.log
     samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai
     """
 }
 
-// Replicate rawBam for multiple process inputs
-rawBam.set {
-  rawBam_dedupData
-}
-
 /*
- *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
-*/
+ * dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
+ */
 process dedupData {
   tag "${repRID}"
-  publishDir "${outDir}/bam", mode: 'copy', pattern: "*.deduped.bam"
+  publishDir "${outDir}/bam", mode: 'copy', pattern: "*.deduped.{bam,bai}"
 
   input:
-    tuple path (bam), path (bai) from rawBam_dedupData
-    val fastqCountError_dedupData
-    val fastqReadError_dedupData
-    val fastqFileError_dedupData
-    val speciesError_dedupData
-    val pipelineError_dedupData
+    tuple path (bam), path (bai) from rawBam
+    val fastqCountError from fastqCountError_dedupData
+    val fastqReadError from fastqReadError_dedupData
+    val fastqFileError from fastqFileError_dedupData
+    val seqtypeError from seqtypeError_dedupData
+    val speciesErrorSeqwho from speciesErrorSeqwho_dedupData
+    val speciesError from speciesError_dedupData
+    val pipelineError from pipelineError_dedupData
 
   output:
     tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam
@@ -1706,11 +1809,7 @@ process dedupData {
     path ("*.deduped.Metrics.txt") into dedupQC
 
   when:
-    fastqCountError_dedupData == 'false'
-    fastqReadError_dedupData == 'false'
-    fastqFileError_dedupData == 'false'
-    speciesError_dedupData == 'false'
-    pipelineError_dedupData == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -1747,29 +1846,27 @@ dedupBam.into {
 }
 
 /*
- *makeBigWig: make BigWig files for output
-*/
+ * makeBigWig: make BigWig files for output
+ */
 process makeBigWig {
   tag "${repRID}"
   publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}_sorted.deduped.bw"
 
   input:
     tuple path (bam), path (bai) from dedupBam_makeBigWig
-    val fastqCountError_makeBigWig
-    val fastqReadError_makeBigWig
-    val fastqFileError_makeBigWig
-    val speciesError_makeBigWig
-    val pipelineError_makeBigWig
+    val fastqCountError from fastqCountError_makeBigWig
+    val fastqReadError from fastqReadError_makeBigWig
+    val fastqFileError from fastqFileError_makeBigWig
+    val seqtypeError from seqtypeError_makeBigWig
+    val speciesErrorSeqwho from speciesErrorSeqwho_makeBigWig
+    val speciesError from speciesError_makeBigWig
+    val pipelineError from pipelineError_makeBigWig
 
   output:
     path ("${repRID}_sorted.deduped.bw") into bigwig
 
   when:
-    fastqCountError_makeBigWig == 'false'
-    fastqReadError_makeBigWig == 'false'
-    fastqFileError_makeBigWig == 'false'
-    speciesError_makeBigWig == 'false'
-    pipelineError_makeBigWig == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -1784,8 +1881,8 @@ process makeBigWig {
 }
 
 /*
- *countData: count data and calculate tpm
-*/
+ * countData: count data and calculate tpm
+ */
 process countData {
   tag "${repRID}"
   publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*_tpmTable.csv"
@@ -1797,11 +1894,13 @@ process countData {
     path ref from reference_countData
     val ends from endsInfer_countData
     val stranded from strandedInfer_countData
-    val fastqCountError_countData
-    val fastqReadError_countData
-    val fastqFileError_countData
-    val speciesError_countData
-    val pipelineError_countData
+    val fastqCountError from fastqCountError_countData
+    val fastqReadError from fastqReadError_countData
+    val fastqFileError from fastqFileError_countData
+    val seqtypeError from seqtypeError_countData
+    val speciesErrorSeqwho from speciesErrorSeqwho_countData
+    val speciesError from speciesError_countData
+    val pipelineError from pipelineError_countData
 
   output:
     path ("*_tpmTable.csv") into counts
@@ -1809,11 +1908,7 @@ process countData {
     path ("assignedReads.csv") into assignedReadsInfer_fl
 
   when:
-    fastqCountError_countData == 'false'
-    fastqReadError_countData == 'false'
-    fastqFileError_countData == 'false'
-    speciesError_countData == 'false'
-    pipelineError_countData == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -1860,21 +1955,19 @@ process countData {
     """
 }
 
-// Extract number of assigned reads metadata into channel
+// Extract number of assigned reads metadata into channel and replicate them for multiple process inputs
 assignedReadsInfer = Channel.create()
 assignedReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
   assignedReadsInfer
 )
-
-// Replicate inferred assigned reads for multiple process inputs
 assignedReadsInfer.into {
   assignedReadsInfer_aggrQC
   assignedReadsInfer_uploadQC
 }
 
 /*
- *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
-*/
+ * dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
+ */
 process dataQC {
   tag "${repRID}"
 
@@ -1884,11 +1977,13 @@ process dataQC {
     tuple path (bam), path (bai) from dedupBam_dataQC
     tuple path (chrBam), path (chrBai) from dedupChrBam
     val ends from endsInfer_dataQC
-    val fastqCountError_dataQC
-    val fastqReadError_dataQC
-    val fastqFileError_dataQC
-    val speciesError_dataQC
-    val pipelineError_dataQC
+    val fastqCountError from fastqCountError_dataQC
+    val fastqReadError from fastqReadError_dataQC
+    val fastqFileError from fastqFileError_dataQC
+    val seqtypeError from seqtypeError_dataQC
+    val speciesErrorSeqwho from speciesErrorSeqwho_dataQC
+    val speciesError from speciesError_dataQC
+    val pipelineError from pipelineError_dataQC
 
   output:
     path "${repRID}_tin.hist.tsv" into tinHist
@@ -1896,11 +1991,7 @@ process dataQC {
     path "${repRID}_insertSize.inner_distance_freq.txt" into innerDistance
 
   when:
-    fastqCountError_dataQC == 'false'
-    fastqReadError_dataQC == 'false'
-    fastqFileError_dataQC == 'false'
-    speciesError_dataQC == 'false'
-    pipelineError_dataQC == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -1932,21 +2023,19 @@ process dataQC {
     """
 }
 
-// Extract median TIN metadata into channel
+// Extract median TIN metadata into channel and replicate them for multiple process inputs
 tinMedInfer = Channel.create()
 tinMedInfer_fl.splitCsv(sep: ",", header: false).separate(
   tinMedInfer
 )
-
-// Replicate inferred median TIN for multiple process inputs
 tinMedInfer.into {
   tinMedInfer_aggrQC
   tinMedInfer_uploadQC
 }
 
 /*
- *aggrQC: aggregate QC from processes as well as metadata and run MultiQC
-*/
+ * aggrQC: aggregate QC from processes as well as metadata and run MultiQC
+ */
 process aggrQC {
   tag "${repRID}"
   publishDir "${outDir}/report", mode: 'copy', pattern: "${repRID}.multiqc.html"
@@ -1955,6 +2044,7 @@ process aggrQC {
   input:
     path multiqcConfig
     path bicfLogo
+    path seqwhoInfer
     path softwareReferences
     path softwareVersions
     path fastqc
@@ -1964,7 +2054,8 @@ process aggrQC {
     path countsQC
     path innerDistance
     path tinHist
-    path alignSampleQCs from alignSampleQC_aggrQC.collect()
+    path alignSampleQC_ERCC from alignSampleQC_ERCC
+    path alignSampleQC from alignSampleQC
     path inferExperiment
     val endsManual from endsManual_aggrQC
     val endsM from endsMeta_aggrQC
@@ -1982,22 +2073,20 @@ process aggrQC {
     val tinMedI from tinMedInfer_aggrQC
     val studyRID from studyRID_aggrQC
     val expRID from expRID_aggrQC
-    val fastqCountError_aggrQC
-    val fastqReadError_aggrQC
-    val fastqFileError_aggrQC
-    val speciesError_aggrQC
-    val pipelineError_aggrQC
+    val fastqCountError from fastqCountError_aggrQC
+    val fastqReadError from fastqReadError_aggrQC
+    val fastqFileError from fastqFileError_aggrQC
+    val seqtypeError from seqtypeError_aggrQC
+    val speciesErrorSeqwho from speciesErrorSeqwho_aggrQC
+    val speciesError from speciesError_aggrQC
+    val pipelineError from pipelineError_aggrQC
 
   output:
     path "${repRID}.multiqc.html" into multiqc
     path "${repRID}.multiqc_data.json" into multiqcJSON
 
   when:
-    fastqCountError_aggrQC == 'false'
-    fastqReadError_aggrQC == 'false'
-    fastqFileError_aggrQC == 'false'
-    speciesError_aggrQC == 'false'
-    pipelineError_aggrQC == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -2094,9 +2183,186 @@ process aggrQC {
     """
 }
 
+/* 
+ * uploadInputBag: uploads the input bag
+ */
+process uploadInputBag {
+  tag "${repRID}"
+
+  input:
+    path script_uploadInputBag
+    path credential, stageAs: "credential.json" from deriva_uploadInputBag
+    path inputBag from inputBag_uploadInputBag
+    val studyRID from studyRID_uploadInputBag
+
+  output:
+    path ("inputBagRID.csv") into inputBagRID_fl
+
+  when:
+    upload
+
+  script:
+    """
+    hostname > ${repRID}.uploadInputBag.log
+    ulimit -a >> ${repRID}.uploadInputBag.log
+
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadInputBag.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadInputBag.log
+
+    yr=\$(date +'%Y')
+    mn=\$(date +'%m')
+    dy=\$(date +'%d')
+
+    file=\$(basename -a ${inputBag})
+    md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
+    echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
+    size=\$(wc -c < ./\${file})
+    echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
+    
+    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
+    if [ "\${exist}" == "[]" ]
+    then
+        cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+        cookie=\${cookie:11:-1}
+
+        loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
+        inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
+        echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
+        rid=\${inputBag_rid}
+    else
+        exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+        exist=\${exist:7:-6}
+        echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
+        rid=\${exist}
+    fi
+
+    echo "\${rid}" > inputBagRID.csv
+    """
+}
+
+// Extract input bag RID into channel and replicate them for multiple process inputs
+inputBagRID = Channel.create()
+inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
+  inputBagRID
+)
+inputBagRID.into {
+  inputBagRID_uploadExecutionRun
+  inputBagRID_finalizeExecutionRun
+  inputBagRID_failPreExecutionRun
+  inputBagRID_failExecutionRun
+}
+
+/* 
+ * uploadExecutionRun: uploads the execution run
+ */
+process uploadExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun_uploadExecutionRun
+    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
+    val spike from spikeInfer_uploadExecutionRun
+    val species from speciesInfer_uploadExecutionRun
+    val inputBagRID from inputBagRID_uploadExecutionRun
+    val fastqCountError from fastqCountError_uploadExecutionRun
+    val fastqReadError from fastqReadError_uploadExecutionRun
+    val fastqFileError from fastqFileError_uploadExecutionRun
+    val seqtypeError from seqtypeError_uploadExecutionRun
+    val speciesErrorSeqwho from speciesErrorSeqwho_uploadExecutionRun
+    val speciesError from speciesError_uploadExecutionRun
+    
+  output:
+    path ("executionRunRID.csv") into executionRunRID_fl
+
+  when:
+    upload
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false"
+
+  script:
+    """
+    hostname > ${repRID}.uploadExecutionRun.log
+    ulimit -a >> ${repRID}.uploadExecutionRun.log
+
+    # link credential file for authentication
+    echo -e "LOG: linking deriva credentials" >> ${repRID}.uploadExecutionRun.log
+    mkdir -p ~/.deriva
+    ln -sf `readlink -e credential.json` ~/.deriva/credential.json
+    echo -e "LOG: linked" >> ${repRID}.uploadExecutionRun.log
+
+    echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
+    workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
+    workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+    workflow=\${workflow:7:-6}
+    echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
+
+    if [ "${species}" == "Homo sapiens" ]
+    then
+      genomeName=\$(echo GRCh${refHuVersion})
+    elif [ "${species}" == "Mus musculus" ]
+    then
+      genomeName=\$(echo GRCm${refMoVersion})
+    fi
+    if [ "${spike}" == "true" ]
+    then
+      genomeName=\$(echo \${genomeName}-S)
+    fi
+    echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
+    genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName})
+    genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+    genome=\${genome:7:-6}
+    echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
+
+    cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+    cookie=\${cookie:11:-1}
+
+    exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
+    echo \${exist} >> ${repRID}.uploadExecutionRun.log
+    if [ "\${exist}" == "[]" ]
+    then
+      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
+      echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+    else
+      rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+      rid=\${rid:7:-6}
+      echo \${rid} >> ${repRID}.uploadExecutionRun.log
+      executionRun_rid=\$(python3 ${script_uploadExecutionRun_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
+      echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+    fi
+
+    echo "\${executionRun_rid}" > executionRunRID.csv
+
+    if [ ${params.track} == true ]
+    then
+      curl -H 'Content-Type: application/json' -X PUT -d \
+        '{ \
+          "ID": "${workflow.sessionId}", \
+          "ExecutionRunRID": "'\${executionRun_rid}'" \
+        }' \
+        "https://9ouc12dkwb.execute-api.us-east-2.amazonaws.com/prod/db/track"
+    fi
+    """
+}
+
+// Extract execution run RID into channel and replicate them for multiple process inputs
+executionRunRID = Channel.create()
+executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
+  executionRunRID
+)
+executionRunRID.into {
+  executionRunRID_uploadQC
+  executionRunRID_uploadProcessedFile
+  executionRunRID_uploadOutputBag
+  executionRunRID_finalizeExecutionRun
+  executionRunRID_failExecutionRun
+  executionRunRID_fail
+}
+
 /* 
  * uploadQC: uploads the mRNA QC
-*/
+ */
 process uploadQC {
   tag "${repRID}"
 
@@ -2111,22 +2377,20 @@ process uploadQC {
     val rawCount from rawReadsInfer_uploadQC
     val finalCount from assignedReadsInfer_uploadQC
     val tinMed from tinMedInfer_uploadQC
-    val fastqCountError_uploadQC
-    val fastqReadError_uploadQC
-    val fastqFileError_uploadQC
-    val speciesError_uploadQC
-    val pipelineError_uploadQC
+    val fastqCountError from fastqCountError_uploadQC
+    val fastqReadError from fastqReadError_uploadQC
+    val fastqFileError from fastqFileError_uploadQC
+    val seqtypeError from seqtypeError_uploadQC
+    val speciesErrorSeqwho from speciesErrorSeqwho_uploadQC
+    val speciesError from speciesError_uploadQC
+    val pipelineError from pipelineError_uploadQC
 
   output:
     path ("qcRID.csv") into qcRID_fl
 
   when:
     upload
-    fastqCountError_uploadQC == 'false'
-    fastqReadError_uploadQC == 'false'
-    fastqFileError_uploadQC == 'false'
-    speciesError_uploadQC == 'false'
-    pipelineError_uploadQC == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -2170,8 +2434,8 @@ process uploadQC {
 }
 
 /*
- *uploadProcessedFile: uploads the processed files
-*/
+ * uploadProcessedFile: uploads the processed files
+ */
 process uploadProcessedFile {
   tag "${repRID}"
   publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip"
@@ -2189,25 +2453,24 @@ process uploadProcessedFile {
     val studyRID from studyRID_uploadProcessedFile
     val expRID from expRID_uploadProcessedFile
     val executionRunRID from executionRunRID_uploadProcessedFile
-    val fastqCountError_uploadProcessedFile
-    val fastqReadError_uploadProcessedFile
-    val fastqFileError_uploadProcessedFile
-    val speciesError_uploadProcessedFile
-    val pipelineError_uploadProcessedFile
+    val fastqCountError from fastqCountError_uploadProcessedFile
+    val fastqReadError from fastqReadError_uploadProcessedFile
+    val fastqFileError from fastqFileError_uploadProcessedFile
+    val seqtypeError from seqtypeError_uploadProcessedFile
+    val speciesErrorSeqwho from speciesErrorSeqwho_uploadProcessedFile
+    val speciesError from speciesError_uploadProcessedFile
+    val pipelineError from pipelineError_uploadProcessedFile
 
   output:
     path ("${repRID}_Output_Bag.zip") into outputBag
 
   when:
     upload
-    fastqCountError_uploadProcessedFile == 'false'
-    fastqReadError_uploadProcessedFile == 'false'
-    fastqFileError_uploadProcessedFile == 'false'
-    speciesError_uploadProcessedFile == 'false'
-    pipelineError_uploadProcessedFile == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
+    
     hostname > ${repRID}.uploadProcessedFile.log
     ulimit -a >> ${repRID}.uploadProcessedFile.log
 
@@ -2279,7 +2542,7 @@ process uploadProcessedFile {
 
 /* 
  * uploadOutputBag: uploads the output bag
-*/
+ */
 process uploadOutputBag {
   tag "${repRID}"
 
@@ -2289,22 +2552,20 @@ process uploadOutputBag {
     path outputBag
     val studyRID from studyRID_uploadOutputBag
     val executionRunRID from executionRunRID_uploadOutputBag
-    val fastqCountError_uploadOutputBag
-    val fastqReadError_uploadOutputBag
-    val fastqFileError_uploadOutputBag
-    val speciesError_uploadOutputBag
-    val pipelineError_uploadOutputBag
+    val fastqCountError from fastqCountError_uploadOutputBag
+    val fastqReadError from fastqReadError_uploadOutputBag
+    val fastqFileError from fastqFileError_uploadOutputBag
+    val seqtypeError from seqtypeError_uploadOutputBag
+    val speciesErrorSeqwho from speciesErrorSeqwho_uploadOutputBag
+    val speciesError from speciesError_uploadOutputBag
+    val pipelineError from pipelineError_uploadOutputBag
 
   output:
     path ("outputBagRID.csv") into outputBagRID_fl
 
   when:
     upload
-    fastqCountError_uploadOutputBag == 'false'
-    fastqReadError_uploadOutputBag == 'false'
-    fastqFileError_uploadOutputBag == 'false'
-    speciesError_uploadOutputBag == 'false'
-    pipelineError_uploadOutputBag == 'false'
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -2363,7 +2624,7 @@ outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
 
 /* 
  * finalizeExecutionRun: finalizes the execution run
-*/
+ */
 process finalizeExecutionRun {
   tag "${repRID}"
 
@@ -2373,9 +2634,17 @@ process finalizeExecutionRun {
     val executionRunRID from executionRunRID_finalizeExecutionRun
     val inputBagRID from inputBagRID_finalizeExecutionRun
     val outputBagRID
+    val fastqCountError from fastqCountError_finalizeExecutionRun
+    val fastqReadError from fastqReadError_finalizeExecutionRun
+    val fastqFileError from fastqFileError_finalizeExecutionRun
+    val seqtypeError from seqtypeError_finalizeExecutionRun
+    val speciesErrorSeqwho from speciesErrorSeqwho_finalizeExecutionRun
+    val speciesError from speciesError_finalizeExecutionRun
+    val pipelineError from pipelineError_finalizeExecutionRun
 
   when:
     upload
+    fastqCountError == "false" && fastqReadError == "false" && fastqFileError == "false" && seqtypeError == "false" && speciesErrorSeqwho == "false" && speciesError == "false" && pipelineError == "false"
 
   script:
     """
@@ -2406,16 +2675,16 @@ process finalizeExecutionRun {
 }
 
 // Combine errors
-error_meta = fastqCountError_uploadQC_fail.ifEmpty(false).combine(fastqReadError_uploadQC_fail.ifEmpty(false).combine(fastqFileError_uploadQC_fail.ifEmpty(false).combine(speciesError_uploadQC_fail.ifEmpty(false).combine(pipelineError_uploadQC_fail.ifEmpty(false)))))
-error_meta. into{
+error_meta = fastqCountError_uploadQC_fail.ifEmpty(false).combine(fastqReadError_uploadQC_fail.ifEmpty(false).combine(fastqFileError_uploadQC_fail.ifEmpty(false).combine(seqtypeError_uploadQC_fail.ifEmpty(false).combine(speciesErrorSeqwho_uploadQC_fail.ifEmpty(false).combine(speciesError_uploadQC_fail.ifEmpty(false).combine(pipelineError_uploadQC_fail.ifEmpty(false)))))))
+error_meta. into {
   error_failPreExecutionRun
   error_uploadQC_fail
 }
-errorDetails = fastqCountError_details.ifEmpty("").combine(fastqReadError_details.ifEmpty("").combine(fastqFileError_details.ifEmpty("").combine(speciesError_details.ifEmpty(""))))
+errorDetails = fastqCountError_details.ifEmpty("").combine(fastqReadError_details.ifEmpty("").combine(fastqFileError_details.ifEmpty("").combine(seqtypeError_details.ifEmpty("").combine(speciesErrorSeqwho_details.ifEmpty("")))))
 
 /* 
- * failPreExecutionRun_fastq: fail the execution run prematurely for fastq errors
-*/
+ * failPreExecutionRun: fail the execution run prematurely for fastq errors
+ */
 process failPreExecutionRun {
   tag "${repRID}"
 
@@ -2425,15 +2694,15 @@ process failPreExecutionRun {
     val spike from spikeMeta_failPreExecutionRun
     val species from speciesMeta_failPreExecutionRun
     val inputBagRID from inputBagRID_failPreExecutionRun
-    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (speciesError), val (pipelineError) from error_failPreExecutionRun
-    tuple val (fastqCountError_details), val (fastqReadError_details), val (fastqFileError_details), val (speciesError_details) from errorDetails
+    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (seqtypeError), val (speciesErrorSeqwho), val (speciesError), val (pipelineError) from error_failPreExecutionRun
+    tuple val (fastqCountError_details), val (fastqReadError_details), val (fastqFileError_details), val (seqtypeError_details), val (speciesError_details) from errorDetails
 
   output:
     path ("executionRunRID.csv") into executionRunRID_preFail_fl
 
   when:
     upload
-    fastqCountError == 'true' || fastqReadError == 'true' || fastqFileError == 'true' || speciesError == 'true'
+    fastqCountError == "true" || fastqReadError == "true" || fastqFileError == "true" || seqtypeError == "true" || speciesError == "true"
 
   script:
     """
@@ -2450,6 +2719,9 @@ process failPreExecutionRun {
     elif [ ${fastqFileError} == true ]
     then
       errorDetails=\$(echo \$(errorDetails)${fastqFileError_details}"\\n")
+    elif [ ${seqtypeError} == true ]
+    then
+      errorDetails=\$(echo \$(errorDetails)${seqtypeError_details}"\\n")
     elif [ ${speciesError} == true ]
     then
       errorDetails=\$(echo \$(errorDetails)${speciesError_details}"\\n")
@@ -2520,7 +2792,7 @@ failExecutionRunRID = executionRunRID_fail.ifEmpty('').mix(executionRunRID_preFa
 
 /* 
  * failExecutionRun: fail the execution run
-*/
+ */
 process failExecutionRun {
   tag "${repRID}"
 
@@ -2546,7 +2818,7 @@ process failExecutionRun {
 
   when:
     upload
-    pipelineError == 'true'
+    pipelineError == "true"
 
   script:
     """
@@ -2615,7 +2887,7 @@ process failExecutionRun {
 
 /* 
  * uploadQC_fail: uploads the mRNA QC on failed execution run
-*/
+ */
 process uploadQC_fail {
   tag "${repRID}"
 
@@ -2624,11 +2896,11 @@ process uploadQC_fail {
     path script_uploadQC_fail
     path credential, stageAs: "credential.json" from deriva_uploadQC_fail
     val executionRunRID from failExecutionRunRID
-    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (speciesError), val (pipelineError) from error_uploadQC_fail
+    tuple val (fastqCountError), val (fastqReadError), val (fastqFileError), val (seqtypeError), val (speciesErrorSeqwho), val (speciesError), val (pipelineError) from error_uploadQC_fail
 
   when:
     upload
-    fastqCountError == 'true' || fastqReadError == 'true' || fastqFileError == 'true' || speciesError == 'true' || pipelineError == 'true'
+    fastqCountError == "true" || fastqReadError == "true" || fastqFileError == "true" || seqtypeError == "true" || speciesErrorSeqwho == "true" || speciesError == "true" || pipelineError == "true"
 
   script:
     """
@@ -2663,7 +2935,6 @@ process uploadQC_fail {
     """
 }
 
-
 workflow.onError = {
   subject = "$workflow.manifest.name FAILED: $params.repRID"
 
diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml
index ed1375aed47a454394029e5057695b0c15babd8c..89059e01682adfcd9354e3c7d78b6a65a87bf569 100644
--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
@@ -27,17 +27,17 @@ top_modules:
   - picard:
       name: 'Dedup'
       info: 'Replicate Alignement Deduplication QC Results'
+  - featureCounts:
+      name: 'Count'
+      info: 'Replicate Feature Count QC Results'
   - rseqc:
       name: 'Inner Distance'
       info: 'Replicate Paired End Inner Distance Distribution Results'
       path_filters:
         - '*insertSize*'
   - custom_content
-  - featureCounts:
-      name: 'Count'
-      info: 'Replicate Feature Count QC Results'
   - hisat2:
-      name: 'Inference: Align'
+      name: 'Inference: Spike-in'
       info: 'Inference Alignment (1M downsampled reads) QC Results'
       path_filters:
         - '*alignSampleSummary*'
@@ -131,8 +131,8 @@ custom_data:
 
     ref:
         file_format: 'tsv'
-        section_name: 'Reference'
-        description: 'This is the reference version information'
+        section_name: 'Genome Reference'
+        description: 'This is the genome reference version information'
         plot_type: 'table'
         pconfig:
             id: 'ref'
@@ -166,6 +166,28 @@ custom_data:
             71 - 80
             81 - 90
             91 - 100
+    seqwho:
+        file_format: 'tsv'
+        section_name: 'Inference: Sequencing Type & Species'
+        description: 'This is the inference results from the SeqWho tool'
+        plot_type: 'table'
+        pconfig:
+            id: 'ref'
+            scale: false
+            format: '{}'
+        headers:
+            Read:
+                description: 'Sequencing read (R1/R2)'
+            Seq Type:
+                description: 'Inferred Sequence Type'
+            Species:
+                description: 'Inferred Species'
+            Seq Type Confidence:
+                description: 'Seqence type call confidence'
+            Seq Type Consensus:
+                description: 'Low confidence sequence type call sampling inference consensus'
+            Species Confidence:
+                description: 'Species call confidence'
 
 sp:
     run:
@@ -178,3 +200,5 @@ sp:
         fn: 'reference.tsv'
     tin:
         fn: '*_tin.hist.tsv'
+    seqwho:
+        fn: 'seqwhoInfer.tsv'
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
deleted file mode 120000
index 2984ceedc04dab65543aa5707372e679b4d7653a..0000000000000000000000000000000000000000
--- a/workflow/nextflow.config
+++ /dev/null
@@ -1 +0,0 @@
-../nextflow.config
\ No newline at end of file
diff --git a/workflow/nextflowConf b/workflow/nextflowConf
deleted file mode 120000
index 8e5c4cf73c241a65e5274d161bba48c582b1c9d3..0000000000000000000000000000000000000000
--- a/workflow/nextflowConf
+++ /dev/null
@@ -1 +0,0 @@
-../nextflowConf/
\ No newline at end of file
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
deleted file mode 120000
index e5aa8d1b5b16a35ab26e961a8e4cc5009d67aef5..0000000000000000000000000000000000000000
--- a/workflow/rna-seq.nf
+++ /dev/null
@@ -1 +0,0 @@
-../rna-seq.nf
\ No newline at end of file
diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py
index 09447d17a62a439a418753398e1cd77716ceaa74..2ff498659cc9acbf989ec45e61e8b755b9cc3a66 100644
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -34,17 +34,19 @@ SOFTWARE_REGEX = {
     'Python': ['version_python.txt', r"Python (\S+)"],
     'DERIVA': ['version_deriva.txt', r"(\S+)"],
     'BDBag': ['version_bdbag.txt', r"BDBag (\S+) \(Bagit \S+\)"],
-    'RSeQC': ['version_rseqc.txt', r"infer_experiment.py (\S+)"],
     'Trim Galore!': ['version_trimgalore.txt', r"version (\S+)"],
     'HISAT2': ['version_hisat2.txt', r"version (\S+)"],
     'Samtools': ['version_samtools.txt', r"samtools (\S+)"],
     'picard (MarkDuplicates)': ['version_markdups.txt', r"Version:(\S+)"],
     'featureCounts': ['version_featurecounts.txt', r"featureCounts v(\S+)"],
-    'R': ['version_r.txt', r"R version (\S+)"],
     'deepTools': ['version_deeptools.txt', r"deeptools (\S+)"],
+    'Seqtk': ['version_seqtk.txt', r"Version: (\S+)"],
+    'R': ['version_r.txt', r"R version (\S+)"],
     'FastQC': ['version_fastqc.txt', r"FastQC v(\S+)"],
+    'SeqWho': ['version_seqwho.txt', r"Version (\S+)"],
+    'RSeQC': ['version_rseqc.txt', r"infer_experiment.py (\S+)"],
     'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
-    'Pipeline Version': ['./workflow/nextflow.config', r"version = 'v(\S+)'"]
+    'Pipeline Version': ['./nextflow.config', r"version = 'v(\S+)'"]
 }
 
 
@@ -93,15 +95,17 @@ def main():
     results['Python'] = '<span style="color:#999999;\">Not Run</span>'
     results['DERIVA'] = '<span style="color:#999999;\">Not Run</span>'
     results['BDBag'] = '<span style="color:#999999;\">Not Run</span>'
-    results['RSeQC'] = '<span style="color:#999999;\">Not Run</span>'
     results['Trim Galore!'] = '<span style="color:#999999;\">Not Run</span>'
     results['HISAT2'] = '<span style="color:#999999;\">Not Run</span>'
     results['Samtools'] = '<span style="color:#999999;\">Not Run</span>'
     results['picard (MarkDuplicates)'] = '<span style="color:#999999;\">Not Run</span>'
     results['featureCounts'] = '<span style="color:#999999;\">Not Run</span>'
-    results['R'] = '<span style="color:#999999;\">Not Run</span>'
     results['deepTools'] = '<span style="color:#999999;\">Not Run</span>'
+    results['Seqtk'] = '<span style="color:#999999;\">Not Run</span>'
+    results['R'] = '<span style="color:#999999;\">Not Run</span>'
     results['FastQC'] = '<span style="color:#999999;\">Not Run</span>'
+    results['SeqWho'] = '<span style="color:#999999;\">Not Run</span>'
+    results['RSeQC'] = '<span style="color:#999999;\">Not Run</span>'
     results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>'
     results['Pipeline Version'] = '<span style="color:#999999;\">Not Run</span>'
 
@@ -125,7 +129,7 @@ def main():
         '''
         id: 'software_versions'
         section_name: 'Software Versions'
-        section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/blob/78-tool_version/docs/RNA-Seq%20Pipeline%20Design%20Process%20Table.pdf'
+        section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/wikis/Pipeline/Tool-Versions'
         plot_type: 'html'
         description: 'are collected for pipeline version.'
         data: |
diff --git a/workflow/scripts/get_updated_badge_info.sh b/workflow/scripts/get_updated_badge_info.sh
index 4b929272f2ea80ede5d47b84cd55bad2c6a3fa7b..19098f57fa0fd1b4455d8c8f52de308d7dd5ae6d 100644
--- a/workflow/scripts/get_updated_badge_info.sh
+++ b/workflow/scripts/get_updated_badge_info.sh
@@ -2,26 +2,28 @@
 
 echo "collecting stats for badges"
 latest_release_tag=$(git tag --sort=-committerdate -l *.*.* | head -1)
-current_pipeline_version=$(git show ${latest_release_tag}:workflow/nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
-current_nextflow_version=$(git show ${latest_release_tag}:workflow/nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
-master_pipeline_version=$(git show origin/master:workflow/nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
-master_nextflow_version=$(git show origin/master:workflow/nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
-develop_pipeline_version=$(git show origin/develop:workflow/nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
-develop_nextflow_version=$(git show origin/develop:workflow/nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
+current_pipeline_version=$(git show ${latest_release_tag}:nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
+current_nextflow_version=$(git show ${latest_release_tag}:nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
+master_pipeline_version=$(git show origin/master:nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
+master_nextflow_version=$(git show origin/master:nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
+develop_pipeline_version=$(git show origin/develop:nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
+develop_nextflow_version=$(git show origin/develop:nextflow.config | grep -o nextflowVersion.* | grep -oP "(?<=').*(?=')")
 
 echo "collecting tool version for badges"
 python_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Python.* | grep -oP "(?<=d>).*(?=\<)")
 deriva_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o DERIVA.* | grep -oP "(?<=d>).*(?=\<)")
 bdbag_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o BDBag.* | grep -oP "(?<=d>).*(?=\<)")
-rseqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o RSeQC.* | grep -oP "(?<=d>).*(?=\<)")
 trimgalore_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o 'Trim Galore!'.* | grep -oP "(?<=d>).*(?=\<)")
 hisat2_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o HISAT2.* | grep -oP "(?<=d>).*(?=\<)")
 samtools_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Samtools.* | grep -oP "(?<=d>).*(?=\<)")
 picard_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o 'picard (MarkDuplicates)'.* | grep -oP "(?<=d>).*(?=\<)")
 featurecounts_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o featureCounts.* | grep -oP "(?<=d>).*(?=\<)")
-r_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o '>R<'.* | grep -oP "(?<=d>).*(?=\<)")
 deeptools_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o deepTools.* | grep -oP "(?<=d>).*(?=\<)")
+seqtk_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Seqtk.* | grep -oP "(?<=d>).*(?=\<)")
+r_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o '>R<'.* | grep -oP "(?<=d>).*(?=\<)")
 fastqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o FastQC.* | grep -oP "(?<=d>).*(?=\<)")
+seqwho_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o SeqWho.* | grep -oP "(?<=d>).*(?=\<)")
+rseqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o RSeQC.* | grep -oP "(?<=d>).*(?=\<)")
 multiqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o MultiQC.* | grep -oP "(?<=d>).*(?=\<)")
 
 echo "collecting badges"
@@ -37,13 +39,15 @@ curl --request GET https://img.shields.io/badge/Nextflow%20Version-${develop_nex
 curl --request GET https://img.shields.io/badge/Python%20Version-${python_version}-blueviolet?style=flat > ./badges/tools/python.svg
 curl --request GET https://img.shields.io/badge/DERIVA%20Version-${deriva_version}-blueviolet?style=flat > ./badges/tools/deriva.svg
 curl --request GET https://img.shields.io/badge/BDBag%20Version-${bdbag_version}-blueviolet?style=flat > ./badges/tools/bdbag.svg
-curl --request GET https://img.shields.io/badge/RSeQC%20Version-${rseqc_version}-blueviolet?style=flat > ./badges/tools/rseqc.svg
 curl --request GET https://img.shields.io/badge/Trim%20Galore%20Version-${trimgalore_version}-blueviolet?style=flat > ./badges/tools/trimgalore.svg
 curl --request GET https://img.shields.io/badge/HISAT2%20Version-${hisat2_version}-blueviolet?style=flat > ./badges/tools/hisat2.svg
 curl --request GET https://img.shields.io/badge/Samtools%20Version-${samtools_version}-blueviolet?style=flat > ./badges/tools/samtools.svg
 curl --request GET https://img.shields.io/badge/picard%20Version-${picard_version}-blueviolet?style=flat > ./badges/tools/picard.svg
 curl --request GET https://img.shields.io/badge/featureCounts%20Version-${featurecounts_version}-blueviolet?style=flat > ./badges/tools/featurecounts.svg
-curl --request GET https://img.shields.io/badge/R%20Version-${r_version}-blueviolet?style=flat > ./badges/tools/r.svg
 curl --request GET https://img.shields.io/badge/deepTools%20Version-${deeptools_version}-blueviolet?style=flat > ./badges/tools/deeptools.svg
+curl --request GET https://img.shields.io/badge/Seqtk%20Version-${seqtk_version}-blueviolet?style=flat > ./badges/tools/seqtk.svg
+curl --request GET https://img.shields.io/badge/R%20Version-${r_version}-blueviolet?style=flat > ./badges/tools/r.svg
 curl --request GET https://img.shields.io/badge/FastQC%20Version-${fastqc_version}-blueviolet?style=flat > ./badges/tools/fastqc.svg
+curl --request GET https://img.shields.io/badge/SeqWho%20Version-${seqwho_version}-blueviolet?style=flat > ./badges/tools/seqwho.svg
+curl --request GET https://img.shields.io/badge/RSeQC%20Version-${rseqc_version}-blueviolet?style=flat > ./badges/tools/rseqc.svg
 curl --request GET https://img.shields.io/badge/MultiQC%20Version-${multiqc_version}-blueviolet?style=flat > ./badges/tools/multiqc.svg
\ No newline at end of file
diff --git a/workflow/scripts/get_updated_rep_count.sh b/workflow/scripts/get_updated_rep_count.sh
index daeb0575d08f2126b40f2db089ae82af4f01ed0c..592da9d99e8cf5d31511f92febaeea8f966e1f08 100644
--- a/workflow/scripts/get_updated_rep_count.sh
+++ b/workflow/scripts/get_updated_rep_count.sh
@@ -2,7 +2,7 @@
 
 echo "collecting stats for badges"
 latest_release_tag=$(git tag --sort=-committerdate -l *.*.* | head -1)
-current_pipeline_version=$(git show ${latest_release_tag}:workflow/nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
+current_pipeline_version=$(git show ${latest_release_tag}:nextflow.config | grep -o version.* | grep -oP "(?<=').*(?=')")
 current_pipeline_versionMajor=$(echo ${current_pipeline_version} | cut -f1 -d".")
 current_pipeline_versionMajor=$(echo ${current_pipeline_versionMajor}".")
 echo "Major pipeline version for search: "${current_pipeline_versionMajor}
diff --git a/workflow/tests/test_seqwho.py b/workflow/tests/test_seqwho.py
new file mode 100644
index 0000000000000000000000000000000000000000..051cc4b379bc2378b2effff22f4737592d9b54cd
--- /dev/null
+++ b/workflow/tests/test_seqwho.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+    '/../../'
+
+
+@pytest.mark.seqwho
+def test_seqwho():
+    assert os.path.exists(os.path.join(
+        test_output_path, 'SeqWho_call.tsv'))