Merge branch '11-deriva.upload' into 'develop'

Resolve "process_derivaUpload" Closes #24, #75, and #11 See merge request !53

Merge branch '11-deriva.upload' into 'develop'
Resolve "process_derivaUpload" Closes #24, #75, and #11 See merge request !53
ad5bf09e · Gervaise Henry · 30143e2f · 5e6b9051 · ad5bf09e · ad5bf09e
Commit ad5bf09e authored 4 years ago by Gervaise Henry 🤠
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,6 +7,11 @@ before_script:
  - mkdir -p ~/.deriva
  - mkdir -p ~/.bdbag

+variables:
+  refMoVersion: "38.p6.vM22"
+  refHuVersion: "38.p12.v31"
+  refERCCVersion: "92"
+
 stages:
  - badges
  - deploy
@@ -47,8 +52,8 @@ getBag:
    - merge_requests
  script:
  - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
-  - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli --version > version_deriva.txt
-  - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-download-cli --version > version_deriva.txt
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-download-cli staging.gudmap.org --catalog 2 ./workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
  - pytest -m getBag
  artifacts:
    name: "$CI_JOB_NAME"
@@ -65,10 +70,10 @@ getData:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag --version > version_bdbag.txt
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bdbag --version > version_bdbag.txt
  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - unzip ./test_data/bag/staging/Replicate_Q-Y5F6.zip
-  - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST
+  - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6 TEST
  - pytest -m getData
  artifacts:
    name: "$CI_JOB_NAME"
@@ -85,16 +90,16 @@ parseMetadata:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/python3:2.0.1_indev' python3 --version > version_python.txt
-  - rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
-  - exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
-  - study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
-  - endsMeta=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
-  - endsManual=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual)
-  - stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
-  - spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
-  - species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
-  - readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength)
+  - singularity run 'docker://gudmaprbk/python3:1.0.0' python3 --version > version_python.txt
+  - rep=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
+  - exp=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
+  - study=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
+  - endsMeta=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
+  - endsManual=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual)
+  - stranded=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
+  - spike=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
+  - species=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
+  - readLength=$(singularity run 'docker://gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength)
  - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
  - pytest -m parseMetadata
  artifacts:
@@ -112,13 +117,13 @@ inferMetadata:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py --version > version_rseqc.txt
+  - singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py --version > version_rseqc.txt
  - >
    align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
    if [[ ${align} == "" ]]; then exit 1; fi
  - >
-    singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
-    ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
+    singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
+    ended=`singularity run 'gudmaprbk/python3:1.0.0' python3 ./workflow/scripts/infer_meta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
    if [[ ${ended} == "" ]]; then exit 1; fi
  - pytest -m inferMetadata
  artifacts:
@@ -136,9 +141,9 @@ trimData:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt
-  - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
-  - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
+  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --version > version_trimgalore.txt
+  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
+  - singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
  - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
  - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
  - pytest -m trimData
@@ -157,10 +162,9 @@ downsampleData:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
+  - singularity run 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
  - pytest -m downsampleData

-
 alignData:
  stage: unit
  only:
@@ -169,16 +173,16 @@ alignData:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 --version > version_hisat2.txt
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools --version > version_samtools.txt
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 --version > version_hisat2.txt
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools --version > version_samtools.txt
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
+  - singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
  - pytest -m alignData
  artifacts:
    name: "$CI_JOB_NAME"
@@ -188,7 +192,6 @@ alignData:
      - version_samtools.txt
    expire_in: 7 days

-
 dedupData:
  stage: unit
  only:
@@ -197,15 +200,15 @@ dedupData:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools --version > version_samtools.txt
-  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt&
-  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
-  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
-  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
+  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools --version > version_samtools.txt
+  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt&
+  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
+  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
+  - singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
  - >
    for i in {"chr8","chr4","chrY"}; do
      echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
-      done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
+    done | singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' parallel -j 20 -k
  - pytest -m dedupData
  artifacts:
    name: "$CI_JOB_NAME"
@@ -225,12 +228,12 @@ countData:
  script:
  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
-  - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
-  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
-  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se_countData
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
  - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
-  - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -v &> version_featurecounts.txt
-  - singularity run 'docker://bicf/subread2:2.0.0' R --version > version_r.txt
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -v &> version_featurecounts.txt
+  - singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' R --version > version_r.txt
  - pytest -m makeFeatureCounts
  artifacts:
    name: "$CI_JOB_NAME"
@@ -248,8 +251,8 @@ makeBigWig:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' deeptools --version > version_deeptools.txt
-  - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
+  - singularity run 'docker://gudmaprbk/deeptools3.5.0:1.0.0' deeptools --version > version_deeptools.txt
+  - singularity run 'docker://gudmaprbk/deeptools3.5.0:1.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
  - pytest -m makeBigWig
  artifacts:
    name: "$CI_JOB_NAME"
@@ -266,8 +269,8 @@ fastqc:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc --version > version_fastqc.txt
-  - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
+  - singularity run 'docker://gudmaprbk/fastqc0.11.9:1.0.0' fastqc --version > version_fastqc.txt
+  - singularity run 'docker://gudmaprbk/fastqc0.11.9:1.0.0' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
  - pytest -m fastqc
  artifacts:
    name: "$CI_JOB_NAME"
@@ -286,11 +289,85 @@ dataQC:
    - merge_requests
  script:
  - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
-  - for i in {"chr8","chr4","chrY"}; do
-    echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
+  - >
+    for i in {"chr8","chr4","chrY"}; do
+      echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
+    done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
  - pytest -m dataQC

-outputBag:
+uploadInputBag:
+  stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+  - echo THIS IS A TEST FILE > test.txt
+  - >
+    md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
+    size=$(wc -c < ./test.txt) &&
+    exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=${md5}) &&
+    if [ "${exist}" == "[]" ]; then
+      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+      cookie=${cookie:11:-1} &&
+      loc=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/input_bag/TEST/test.txt --parents) &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_input_bag.py -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test input bag' -o staging.gudmap.org -c ${cookie}) &&
+      echo ${rid} test input bag created
+    else
+      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+      rid=${rid:8:-6} &&
+      echo ${rid} test input bag already exists
+    fi
+
+uploadExecutionRun:
+  stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+  - >
+    exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Replicate=17-BTFJ) &&
+    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+    cookie=${cookie:11:-1} &&
+    if [ "${exist}" == "[]" ]; then
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BTFM -g 17-BT50 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
+      echo ${rid} test execution run created
+    else
+      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+      rid=${rid:7:-6} &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BTFM -g 17-BT50 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
+      echo ${rid} test execution run already exists
+    fi
+
+uploadQC:
+  stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+  - >
+    exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=17-BTFJ) &&
+    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+    cookie=${cookie:11:-1} &&
+    if [ "${exist}" != "[]" ]; then
+      rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
+      for rid in ${rids}; do
+        singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/delete_entry.py -r ${rid} -t mRNA_QC -o staging.gudmap.org -c ${cookie}
+      done
+      echo all old mRNA QC RIDs deleted
+    fi
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BTG4 -p "Single Read" -s forward -l 35 -w 5 -f 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
+      echo ${rid} test mRNA QC created
+
+uploadProcessedFile:
  stage: unit
  only:
    - push
@@ -298,10 +375,54 @@ outputBag:
  except:
    - merge_requests
  script:
+  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+  - echo THIS IS A TEST FILE > 17-BTFJ_test.csv
+  - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BTG4/
+  - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BTG4/17-BTFJ_test.csv
+  - >
+    exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=17-BTFJ) &&
+    cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+    cookie=${cookie:11:-1} &&
+    if [ "${exist}" != "[]" ]; then
+      rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
+      for rid in ${rids}; do
+        singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/delete_entry.py -r ${rid} -t Processed_File -o staging.gudmap.org -c ${cookie}
+      done
+      echo all old processed file RIDs deleted
+    fi
+      singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-upload-cli --catalog 2 --token ${cookie:9} staging.gudmap.org ./deriva
+      echo test processed file uploaded
  - mkdir test
-  - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag test --archiver zip
+  - singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bdbag test --archiver zip
+  - echo test output bag created
  - pytest -m outputBag

+uploadOutputBag:
+  stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
+  - echo THIS IS A TEST FILE > test.txt
+  - >
+    md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
+    size=$(wc -c < ./test.txt) &&
+    exist=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=${md5}) &&
+    if [ "${exist}" == "[]" ]; then
+      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
+      cookie=${cookie:11:-1} &&
+      loc=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/output_bag/TEST/test.txt --parents) &&
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/upload_output_bag.py -e 17-BTG4 -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
+      echo ${rid} test output bag created
+    else
+      rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
+      rid=${rid:8:-6} &&
+      echo ${rid} test output bag already exists
+    fi
+

 generateVersions:
  stage: aggregation
@@ -311,7 +432,7 @@ generateVersions:
  except:
    - merge_requests
  script:
-  - singularity run 'docker://bicf/multiqc1.8:2.0.1_indev' multiqc --version > version_multiqc.txt
+  - singularity run 'docker://gudmaprbk/multiqc1.9:1.0.0' multiqc --version > version_multiqc.txt
  - python ./workflow/scripts/generate_versions.py -o software_versions
  - python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
  artifacts:
@@ -323,7 +444,7 @@ generateVersions:
    expire_in: 7 days


-humanBioHPC:
+human_BioHPC:
  stage: reference
  only:
    - push
@@ -334,7 +455,7 @@ humanBioHPC:
  - mkdir -p hu
  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/

-mouseBioHPC:
+mouse_BioHPC:
  stage: reference
  only:
    - push
@@ -345,7 +466,7 @@ mouseBioHPC:
  - mkdir -p mo
  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/

-humanDataHub:
+human_dev:
  stage: reference
  only:
    - push
@@ -356,14 +477,13 @@ humanDataHub:
  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
  - referenceBase=dev.gudmap.org
  - refName=GRCh
-  - refHuVersion=38.p12.v31
  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL)
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
  - loc=$(dirname ${refURL})
  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
@@ -371,7 +491,7 @@ humanDataHub:
  - test=$(echo ${test} | grep -o ${filename})
  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

-mousenDataHub:
+mouse_dev:
  stage: reference
  only:
    - push
@@ -382,14 +502,115 @@ mousenDataHub:
  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
  - referenceBase=dev.gudmap.org
  - refName=GRCm
+  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - curl --request GET ${query} > refQuery.json
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+  - loc=$(dirname ${refURL})
+  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+  - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+  - test=$(echo ${test} | grep -o ${filename})
+  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+
+human_staging:
+  stage: reference
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+  - referenceBase=staging.gudmap.org
+  - refName=GRCh
+  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
+  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - curl --request GET ${query} > refQuery.json
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+  - loc=$(dirname ${refURL})
+  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+  - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+  - test=$(echo ${test} | grep -o ${filename})
+  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+
+mouse_staging:
+  stage: reference
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+  - referenceBase=staging.gudmap.org
+  - refName=GRCm
  - refHuVersion=38.p6.vM22
+  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - curl --request GET ${query} > refQuery.json
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+  - loc=$(dirname ${refURL})
+  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+  - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+  - test=$(echo ${test} | grep -o ${filename})
+  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+
+human_prod:
+  stage: reference
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+  - referenceBase=www.gudmap.org
+  - refName=GRCh
  - references=$(echo ${referenceBase}/${refName}${refHuVersion})
  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
  - curl --request GET ${query} > refQuery.json
-  - refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL)
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
+  - loc=$(dirname ${refURL})
+  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
+  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
+  - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
+  - test=$(echo ${test} | grep -o ${filename})
+  - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
+
+mouse_prod:
+  stage: reference
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
+  script:
+  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
+  - referenceBase=www.gudmap.org
+  - refName=GRCm
+  - refHuVersion=38.p6.vM22
+  - references=$(echo ${referenceBase}/${refName}${refMoVersion})
+  - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
+  - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
+  - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
+  - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
+  - curl --request GET ${query} > refQuery.json
+  - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
  - loc=$(dirname ${refURL})
  - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
  - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
@@ -418,7 +639,7 @@ integration_se:
      - SE_multiqc_data.json
    expire_in: 7 days
  retry:
-    max: 1
+    max: 0
    when:
      - always

@@ -443,10 +664,11 @@ integration_pe:
      - PE_multiqc_data.json
    expire_in: 7 days
  retry:
-    max: 1
+    max: 0
    when:
      - always

+
 override_inputBag:
  stage: integration
  only: [merge_requests]
@@ -456,7 +678,7 @@ override_inputBag:
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/staging/Replicate_Q-Y5F6.zip --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --ci true
  - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
  artifacts:
    name: "$CI_JOB_NAME"
@@ -465,7 +687,7 @@ override_inputBag:
      - inputBagOverride_PE_multiqc_data.json
    expire_in: 7 days
  retry:
-    max: 1
+    max: 0
    when:
      - always

@@ -478,7 +700,7 @@ override_fastq:
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --ci true
  - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
  artifacts:
    name: "$CI_JOB_NAME"
@@ -487,7 +709,7 @@ override_fastq:
      - fastqOverride_PE_multiqc_data.json
    expire_in: 7 days
  retry:
-    max: 1
+    max: 0
    when:
      - always

@@ -500,7 +722,7 @@ override_species:
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --upload false --ci true
  - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
  artifacts:
    name: "$CI_JOB_NAME"
@@ -509,7 +731,7 @@ override_species:
      - speciesOverride_PE_multiqc_data.json
    expire_in: 7 days
  retry:
-    max: 1
+    max: 0
    when:
      - always


--- a/.gitlab/merge_request_templates/Merge_Request.md
+++ b/.gitlab/merge_request_templates/Merge_Request.md
@@ -5,9 +5,9 @@ These are the most common things requested on pull requests.
 - [ ] This comment contains a description of changes (with reason)
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
 - [ ] Documentation in `docs` is updated
- - [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact
- - [ ] Replace software_versions_mqc.yaml with the most recent CI pipleine generateVersions artifact
- - [ ] Replace software_references_mqc.yaml with the most recent CI pipleine generateVersions artifact
+ - [ ] Replace dag.png with the most recent CI pipeline integrated_pe artifact
+ - [ ] Replace software_versions_mqc.yaml with the most recent CI pipeline generateVersions artifact
+ - [ ] Replace software_references_mqc.yaml with the most recent CI pipeline generateVersions artifact
 - [ ] `CHANGELOG.md` is updated
 - [ ] `README.md` is updated
 - [ ] `LICENSE.md` is updated with new contributors

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
-# v0.0.4 (in development)
+# v0.1.0 (in development)
 **User Facing**
 * Add option to pull references from datahub
 * Add option to send email on workflow error, with pipeline error message
 * Add versions and paper references of software used to report
+* Upload input bag
+* Upload execution run
+* Upload mRNA QC
+* Create and upload output bag
+* Add optional to not upload

 **Background**
 * Remove (comment out) option to pull references from S3
@@ -10,11 +15,11 @@
 * Start using new gudmaprbk dockerhub (images autobuilt)
 * Moved consistency checks to be fully python
 * Changed order of steps so that fastqc is done after the trim step
+* Change docker images to production
+* Add automated version badges

 *Known Bugs*
 * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production
-* outputBag does not contain fetch for processed data
-* Does not include automatic data upload
 * Override params (inputBag, fastq, species) aren't checked for integrity

 <hr>

--- a/README.md
+++ b/README.md
@@ -37,9 +37,12 @@ To Run:
    * **dev** = [dev.gudmap.org](dev.gudmap.org) (default, does not contain all data)
    * **staging** = [staging.gudmap.org](staging.gudmap.org) (does not contain all data)
    * **production** = [www.gudmap.org](www.gudmap.org) (***does contain  all data***)
-  * `--refMoVersion` mouse reference version ***(optional)***
-  * `--refHuVersion` human reference version ***(optional)***
-  * `--refERCCVersion` human reference version ***(optional)***
+  * `--refMoVersion` mouse reference version ***(optional, default = 38.p6.vM22)***
+  * `--refHuVersion` human reference version ***(optional, default = 38.p12.v31)***
+  * `--refERCCVersion` human reference version ***(optional, default = 92)***
+  * `--upload` option to not upload output back to the data-hub ***(optional, default = true)***
+    * **true** = upload outputs to the data-hub
+    * **false** = do *NOT* upload outputs to the data-hub
  * `-profile` config profile to use ***(optional)***:
    * defaut = processes on BioHPC cluster
    * **biohpc** = process on BioHPC cluster
@@ -47,7 +50,7 @@ To Run:
    * **aws_ondemand** = AWS Batch on-demand instant requests
    * **aws_spot** = AWS Batch spot instance requests
  * `--email` email address(es) to send failure notification (comma separated) ***(optional)***:
-    * e.g: `--email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'`
+    * e.g: `--email 'Venkat.Malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'`
    
 * NOTES:
  * once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials)

--- a/docs/dag.png
+++ b/docs/dag.png
--- a/docs/software_references_mqc.yaml
+++ b/docs/software_references_mqc.yaml
@@ -4,7 +4,7 @@
        description: 'This section describes references for the tools used.'
        plot_type: 'html'
        data: |
-
+        
                <h3 id="references">References</h3>
                <ol style="list-style-type: decimal">
                <li><strong>python</strong>:</li>
@@ -41,7 +41,7 @@
                <li><strong>hisat2</strong>:</li>
                </ol>
                <ul>
-                <li>Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. 2019 Nat Biotechnol. 2019 Aug;37(8):907-915. doi:<a href="https://doi.org/10.1038/s41587-019-0201-4">10.1038/s41587-019-0201-4</a></li>
+                <li>Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. 2019 Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. Aug;37(8):907-915. doi:<a href="https://doi.org/10.1038/s41587-019-0201-4">10.1038/s41587-019-0201-4</a>.</li>
                </ul>
                <ol start="7" style="list-style-type: decimal">
                <li><strong>samtools</strong>:</li>

--- a/docs/software_versions_mqc.yaml
+++ b/docs/software_versions_mqc.yaml
@@ -6,19 +6,19 @@
        description: 'are collected for pipeline version.'
        data: |
            <dl class="dl-horizontal">
-
-            <dt>Python</dt><dd>v3.7.7</dd>
-            <dt>DERIVA</dt><dd>v1.0.0</dd>
+        
+            <dt>Python</dt><dd>v3.8.3</dd>
+            <dt>DERIVA</dt><dd>v1.3.0</dd>
            <dt>BDBag</dt><dd>v1.5.6</dd>
-            <dt>RSeQC</dt><dd>v3.0.1</dd>
-            <dt>Trim Galore!</dt><dd>v0.6.4</dd>
-            <dt>HISAT2</dt><dd>v2.1.0</dd>
-            <dt>Samtools</dt><dd>v1.9</dd>
-            <dt>picard (MarkDuplicates)</dt><dd>v2.23.0-SNAPSHOT</dd>
-            <dt>featureCounts</dt><dd>v2.0.0</dd>
-            <dt>R</dt><dd>v3.6.3</dd>
-            <dt>deepTools</dt><dd>v3.3.2</dd>
+            <dt>RSeQC</dt><dd>v4.0.0</dd>
+            <dt>Trim Galore!</dt><dd>v0.6.4_dev</dd>
+            <dt>HISAT2</dt><dd>v2.2.1</dd>
+            <dt>Samtools</dt><dd>v1.11</dd>
+            <dt>picard (MarkDuplicates)</dt><dd>v2.23.9</dd>
+            <dt>featureCounts</dt><dd>v2.0.1</dd>
+            <dt>R</dt><dd>v4.0.3</dd>
+            <dt>deepTools</dt><dd>v3.5.0</dd>
            <dt>FastQC</dt><dd>v0.11.9</dd>
-            <dt>MultiQC</dt><dd>v1.8</dd>
+            <dt>MultiQC</dt><dd>v1.9</dd>
            <dt>Pipeline Version</dt><dd>v0.0.4_indev</dd>
            </dl>
--- a/test_data/createTestData.sh
+++ b/test_data/createTestData.sh
@@ -5,52 +5,54 @@
 module load singularity/3.5.3
 module load pigz/2.4

+ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ../test_data/
+
 mkdir -p NEW_test_data

-ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
+ln -sfn ./test_data/auth/credential.json ~/.deriva/credential.json

 mkdir -p ./NEW_test_data/bag
-singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
-cp Replicate_Q-Y5F6.zip ./NEW_test_data/bag/Replicate_Q-Y5F6.zip
+singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-download-cli staging.gudmap.org --catalog 2 ../workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
+cp Q-Y5F6_inputBag.zip ./NEW_test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip

 mkdir -p ./NEW_test_data/fastq
-unzip ./test_data/bag/Replicate_Q-Y5F6.zip
-singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6
-cp Replicate_Q-Y5F6.R1.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R1.fastq.gz
-cp Replicate_Q-Y5F6.R2.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R2.fastq.gz
+unzip ./NEW_test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip
+singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' bash ../workflow/scripts/bdbagFetch.sh Q-Y5F6_inputBag Q-Y5F6
+cp Q-Y5F6.R1.fastq.gz ./NEW_test_data/fastq/Q-Y5F6.R1.fastq.gz
+cp Q-Y5F6.R2.fastq.gz ./NEW_test_data/fastq/Q-Y5F6.R2.fastq.gz

 mkdir -p ./NEW_test_data/fastq/small
-singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Replicate_Q-Y5F6.R1.fastq.gz 1000000 1> Q-Y5F6_1M.R1.fastq
-singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Replicate_Q-Y5F6.R2.fastq.gz 1000000 1> Q-Y5F6_1M.R2.fastq
+singularity exec 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Q-Y5F6.R1.fastq.gz 1000000 1> Q-Y5F6_1M.R1.fastq
+singularity exec 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Q-Y5F6.R2.fastq.gz 1000000 1> Q-Y5F6_1M.R2.fastq
 pigz Q-Y5F6_1M.R1.fastq
 pigz Q-Y5F6_1M.R2.fastq
 cp Q-Y5F6_1M.R1.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
 cp Q-Y5F6_1M.R2.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz

 mkdir -p ./NEW_test_data/meta
-singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
-singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
+singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
+singularity run 'docker://gudmaprbk/trimgalore0.6.5:1.0.0' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
 cp Q-Y5F6_1M.se_trimmed.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz
-cp Q-Y5F6_1M.pe_R1_val_1.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz
-cp Q-Y5F6_1M.pe_R2_val_2.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz
+cp Q-Y5F6_1M.pe_val_1.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz
+cp Q-Y5F6_1M.pe_val_2.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz
 cp Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt
 cp Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt

 touch metaTest.csv
-echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Has_Strand_Specific_Information,Used_Spike_Ins,Species' > metaTest.csv
-echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,no,no,Homo sapiens' >> metaTest.csv
+echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Has_Strand_Specific_Information,Used_Spike_Ins,Species,Read_Length' > metaTest.csv
+echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,no,no,Homo sapiens,75' >> metaTest.csv
 cp metaTest.csv ./NEW_test_data/meta/metaTest.csv

 mkdir -p ./NEW_test_data/bam
 mkdir -p ./NEW_test_data/bam/small
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
-singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/hisat2/genome --rna-strandness F -U ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
+singularity run 'docker://gudmaprbk/hisat2.2.1:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
 cp Q-Y5F6_1M.se.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.bam
 cp Q-Y5F6_1M.pe.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.pe.bam
 cp Q-Y5F6_1M.se.sorted.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam
@@ -60,18 +62,17 @@ cp Q-Y5F6_1M.pe.sorted.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.pe.sorted.bam
 cp Q-Y5F6_1M.se.alignSummary.txt ./NEW_test_data/meta/Q-Y5F6_1M.se.alignSummary.txt
 cp Q-Y5F6_1M.pe.alignSummary.txt ./NEW_test_data/meta/Q-Y5F6_1M.pe.alignSummary.txt

-singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
-singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.deduped.bam
-singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
+singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
+singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.deduped.bam
+singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
 cp Q-Y5F6_1M.se.deduped.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
 cp Q-Y5F6_1M.se.sorted.deduped.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
 cp Q-Y5F6_1M.se.sorted.deduped.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam.bai
-cp Q-Y5F6_1M.se.deduped.Metrics.txt /NEW_test_data/meta/Q-Y5F6_1M.se.deduped.Metrics.txt
 cp Q-Y5F6_1M.se.deduped.Metrics.txt ./NEW_test_data/meta/Q-Y5F6_1M.se.deduped.Metrics.txt

 for i in {"chr8","chr4","chrY"}; do 
      echo "samtools view -b ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
-      done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
+      done | singularity run 'docker://gudmaprbk/picard2.23.9:1.0.0' parallel -j 20 -k
 cp Q-Y5F6_1M.se.sorted.deduped.chr4.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr4.bam
 cp Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai
 cp Q-Y5F6_1M.se.sorted.deduped.chr8.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr8.bam
@@ -81,28 +82,30 @@ cp Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.

 mkdir -p ./NEW_test_data/counts
 mkdir -p ./NEW_test_data/counts/small
-ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
-ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
-singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
-singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se.countData
-singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
-cp Q-Y5F6_1M.se.featureCounts ./NEW_test_data/counts/small/Q-Y5F6_1M.se.countData
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/geneID.tsv
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/Entrez.tsv
+singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
+singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ../workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se_countData
+singularity run 'docker://gudmaprbk/subread2.0.1:1.0.0' Rscript ../workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
+cp Q-Y5F6_1M.se_countData ./NEW_test_data/counts/small/Q-Y5F6_1M.se_countData
 cp Q-Y5F6_1M.se.countTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se.countTable.csv
-cp Q-Y5F6_1M.se.countTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se.tpmTable.csv
+cp Q-Y5F6_1M.se_tpmTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se_tpmTable.csv

 mkdir -p ./NEW_test_data/bw
 mkdir -p ./NEW_test_data/bw/small
-singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
+singularity run 'docker://gudmaprbk/deeptools3.5.0:1.0.0' bamCoverage -p 20 -b ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
 cp Q-Y5F6_1M.se.bw ./NEW_test_data/bw/small/Q-Y5F6_1M.se.bw

 mkdir -p ./NEW_test_data/fastqc
 mkdir -p ./NEW_test_data/fastqc/small
-singularity run 'docker://bicf/fastqc:2.0.0' ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
+singularity run 'docker://gudmaprbk/fastqc0.11.9:1.0.0' fastqc ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
 cp Q-Y5F6_1M.R1_fastqc.html ./NEW_test_data/fastqc/small/Q-Y5F6_1M.R1_fastqc.html
 cp Q-Y5F6_1M.R1_fastqc.zip ./NEW_test_data/fastqc/small/Q-Y5F6_1M.R1_fastqc.zip

 echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
 for i in {"chr8","chr4","chrY"}; do
-echo "tin.py -i ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
+echo "tin.py -i ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCm38.p6.vM22/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://gudmaprbk/rseqc4.0.0:1.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
 cp Q-Y5F6_1M.se.sorted.deduped.tin.xls ./NEW_test_data/meta/Q-Y5F6_1M.se.sorted.deduped.tin.xls

+chgrp -R BICF_Core ./NEW_test_data
+chmod -R 750 ./NEW_test_data
--- a/workflow/conf/Execution_Run_For_Output_Bag.json
+++ b/workflow/conf/Execution_Run_For_Output_Bag.json
+{
+  "bag": {
+    "bag_name": "Execution_Run_{rid}",
+    "bag_algorithms": [
+      "md5"
+    ],
+    "bag_archiver": "zip",
+    "bag_metadata": {}
+  },
+  "catalog": {
+    "catalog_id": "2",
+    "query_processors": [
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Execution_Run",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Workflow",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Reference_Genome",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "Input_Bag",
+          "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none"
+        }
+      },
+      {
+        "processor": "csv",
+        "processor_params": {
+          "output_path": "mRNA_QC",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
+        }
+      },
+      {
+        "processor": "fetch",
+        "processor_params": {
+          "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag",
+          "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
--- a/workflow/conf/replicate_export_config.json
+++ b/workflow/conf/replicate_export_config.json
 {
  "bag": {
-    "bag_name": "Replicate_{rid}",
+    "bag_name": "{rid}_inputBag",
    "bag_algorithms": [
      "md5"
    ],

--- a/workflow/conf/aws.config
+++ b/workflow/conf/aws.config
@@ -84,7 +84,23 @@ process {
    cpus = 2
    memory = '1 GB'
  }
-  withName: outputBag {
+  withName: uploadInputBag {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName: uploadExecutionRun {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName: uploadQC {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName: uploadProcessedFile {
+    cpus = 1
+    memory = '1 GB'
+  }
+  withName: uploadOutputBag {
    cpus = 1
    memory = '1 GB'
  }

--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -58,7 +58,19 @@ process {
  withName: aggrQC {
    executor = 'local'
  }
-  withName: outputBag {
+  withName: uploadInputBag {
+    executor = 'local'
+  }
+  withName: uploadExecutionRun {
+    executor = 'local'
+  }
+  withName: uploadQC {
+    executor = 'local'
+  }
+  withName: uploadProcessedFile {
+    executor = 'local'
+  }
+  withName: uploadOutputBag {
    executor = 'local'
  }
 }

--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
@@ -74,10 +74,14 @@ custom_data:
            scale: false
            format: '{}'
        headers:
-            Session
-            Session ID
-            Pipeline Version
-            Input
+            Session:
+                description: ''
+            Session ID:
+                description: 'Nextflow session ID'
+            Pipeline Version:
+                description: 'BICF pipeline version'
+            Input:
+                description: 'Input overrides'
    rid:
        file_format: 'tsv'
        section_name: 'RID'
@@ -88,10 +92,14 @@ custom_data:
            scale: false
            format: '{}'
        headers:
-            Replicate
-            Replicate RID
-            Experiment RID
-            Study RID
+            Replicate:
+                description: ''
+            Replicate RID:
+                description: 'Replicate RID'
+            Experiment RID:
+                description: 'Experiment RID'
+            Study RID:
+                description: 'Study RID'
    meta:
        file_format: 'tsv'
        section_name: 'Metadata'
@@ -102,30 +110,43 @@ custom_data:
            scale: false
            format: '{:,.0f}'
        headers:
-            Source
-            Species
-            Ends
-            Stranded
-            Spike-in
-            Raw Reads
-            Assigned Reads
-            Median Read Length
-            Median TIN
-            Pipeline Version
+            Source:
+                description: 'Metadata source'
+            Species:
+                description: 'Species'
+            Ends:
+                description: 'Single or paired end sequencing'
+            Stranded:
+                description: 'Stranded (forward/reverse) or unstranded library prep'
+            Spike-in:
+                description: 'ERCC spike in'
+            Raw Reads:
+                description: 'Number of reads of the sequencer'
+            Assigned Reads:
+                description: 'Final reads after fintering'
+            Median Read Length:
+                description: 'Average read length'
+            Median TIN:
+                description: 'Average transcript integrity number'
+
    ref:
        file_format: 'tsv'
        section_name: 'Reference'
-        description: 'This is the referenec version information'
+        description: 'This is the reference version information'
        plot_type: 'table'
        pconfig:
            id: 'ref'
            scale: false
            format: '{}'
        headers:
-            Species
-            Genome Reference Consortium Build
-            Genome Reference Consortium Patch
-            GENCODE Annotation Release"
+            Species:
+                description: 'Reference species'
+            Genome Reference Consortium Build:
+                description: 'Reference source build'
+            Genome Reference Consortium Patch:
+                description: 'Reference source patch version'
+            GENCODE Annotation Release:
+                description: 'Annotation release version'
    tin:
        file_format: 'tsv'
        section_name: 'TIN'
@@ -135,16 +156,16 @@ custom_data:
            id: 'tin'
        headers:
            chrom
-            0 - 9
-            10 - 19
-            20 - 29
-            30 - 39
-            40 - 49
-            50 - 59
-            60 - 69
-            70 - 79
-            80 - 89
-            90 - 99
+            1 - 10
+            11 - 20
+            21 - 30
+            31 - 40
+            41 - 50
+            51 - 60
+            61 - 70
+            71 - 80
+            81 - 90
+            91 - 100

 sp:
    run:
@@ -156,4 +177,4 @@ sp:
    ref:
        fn: 'reference.tsv'
    tin:
-        fn: '*.tin.hist.tsv'
+        fn: '*_tin.hist.tsv'
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -20,55 +20,67 @@ profiles {

 process {
  withName:getBag {
-    container = 'bicf/gudmaprbkfilexfer:2.0.1_indev'
+    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
  withName:getData {
-    container = 'bicf/gudmaprbkfilexfer:2.0.1_indev'
+    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
  withName: parseMetadata {
    container = 'gudmaprbk/python3:1.0.0'
  }
  withName: trimData {
-    container = 'bicf/trimgalore:1.1'
+    container = 'gudmaprbk/trimgalore0.6.5:1.0.0'
  }
  withName: getRefInfer {
    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
  withName: downsampleData {
-    container = 'bicf/seqtk:2.0.1_indev'
+    container = 'gudmaprbk/seqtk1.3:1.0.0'
  }
  withName: alignSampleData {
-    container = 'bicf/gudmaprbkaligner:2.0.1_indev'
+    container = 'gudmaprbk/hisat2.2.1:1.0.0'
  }
  withName: inferMetadata {
-    container = 'bicf/rseqc3.0:2.0.1_indev'
+    container = 'gudmaprbk/rseqc4.0.0:1.0.0'
  }
  withName: getRef {
    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
  withName: alignData {
-    container = 'bicf/gudmaprbkaligner:2.0.1_indev'
+    container = 'gudmaprbk/hisat2.2.1:1.0.0'
  }
  withName: dedupData {
-    container = 'bicf/gudmaprbkdedup:2.0.0'
+    container = 'gudmaprbk/picard2.23.9:1.0.0'
  }
  withName: countData {
-    container = 'bicf/subread2:2.0.0'
+    container = 'gudmaprbk/subread2.0.1:1.0.0'
  }
  withName: makeBigWig {
-    container = 'bicf/deeptools3.3:2.0.1_indev'
+    container = 'gudmaprbk/deeptools3.5.0:1.0.0'
  }
  withName: fastqc {
-    container = 'bicf/fastqc:2.0.1_indev'
+    container = 'gudmaprbk/fastqc0.11.9:1.0.0'
  }
  withName: dataQC {
-    container = 'bicf/rseqc3.0:2.0.1_indev'
+    container = 'gudmaprbk/rseqc4.0.0:1.0.0'
  }
  withName: aggrQC {
-    container = 'bicf/multiqc1.8:2.0.1_indev'
+    container = 'gudmaprbk/multiqc1.9:1.0.0'
+  }
+  withName:uploadInputBag {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
+  withName:uploadExecutionRun {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
-  withName:outputBag {
-    container = 'bicf/gudmaprbkfilexfer:2.0.1_indev'
+  withName:uploadQC {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
+  withName:uploadProcessedFile {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
+  }
+  withName:uploadOutputBag {
+    container = 'gudmaprbk/deriva1.3:1.0.0'
  }
 }


--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -18,6 +18,7 @@ params.refMoVersion = "38.p6.vM22"
 params.refHuVersion = "38.p12.v31"
 params.refERCCVersion = "92"
 params.outDir = "${baseDir}/../output"
+params.upload = true
 params.email = ""


@@ -36,6 +37,11 @@ deriva.into {
  deriva_getBag
  deriva_getRefInfer
  deriva_getRef
+  deriva_uploadInputBag
+  deriva_uploadExecutionRun
+  deriva_uploadQC
+  deriva_uploadProcessedFile
+  deriva_uploadOutputBag
 }
 bdbag = Channel
  .fromPath(params.bdbag)
@@ -46,13 +52,15 @@ refHuVersion = params.refHuVersion
 refERCCVersion = params.refERCCVersion
 outDir = params.outDir
 logsDir = "${outDir}/Logs"
+upload = params.upload
 inputBagForce = params.inputBagForce
 fastqsForce = params.fastqsForce
 speciesForce = params.speciesForce
 email = params.email

-// Define fixed files
-derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
+// Define fixed files and
+replicateExportConfig = Channel.fromPath("${baseDir}/conf/Replicate_For_Input_Bag.json")
+executionRunExportConfig = Channel.fromPath("${baseDir}/conf/Execution_Run_For_Output_Bag.json")
 if (params.source == "dev") {
  source = "dev.gudmap.org"
 } else if (params.source == "staging") {
@@ -74,15 +82,20 @@ softwareReferences = Channel.fromPath("${baseDir}/../docs/software_references_mq
 softwareVersions = Channel.fromPath("${baseDir}/../docs/software_versions_mqc.yaml")

 // Define script files
-script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
-script_parseMeta = Channel.fromPath("${baseDir}/scripts/parseMeta.py")
-script_inferMeta = Channel.fromPath("${baseDir}/scripts/inferMeta.sh")
-script_refDataInfer = Channel.fromPath("${baseDir}/scripts/extractRefData.py")
-script_refData = Channel.fromPath("${baseDir}/scripts/extractRefData.py")
+script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbag_fetch.sh")
+script_parseMeta = Channel.fromPath("${baseDir}/scripts/parse_meta.py")
+script_inferMeta = Channel.fromPath("${baseDir}/scripts/infer_meta.sh")
+script_refDataInfer = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
+script_refData = Channel.fromPath("${baseDir}/scripts/extract_ref_data.py")
 script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R")
 script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R")
-script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py")
-
+script_tinHist = Channel.fromPath("${baseDir}/scripts/tin_hist.py")
+script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/upload_input_bag.py")
+script_uploadExecutionRun = Channel.fromPath("${baseDir}/scripts/upload_execution_run.py")
+script_uploadQC = Channel.fromPath("${baseDir}/scripts/upload_qc.py")
+script_uploadOutputBag = Channel.fromPath("${baseDir}/scripts/upload_output_bag.py")
+script_deleteEntry_uploadQC = Channel.fromPath("${baseDir}/scripts/delete_entry.py")
+script_deleteEntry_uploadProcessedFile = Channel.fromPath("${baseDir}/scripts/delete_entry.py")

 /*
 * trackStart: track start of pipeline
@@ -143,10 +156,10 @@ process getBag {

  input:
    path credential, stageAs: "credential.json" from deriva_getBag
-    path derivaConfig
+    path replicateExportConfig

  output:
-    path ("Replicate_*.zip") into bag
+    path ("*.zip") into bag

  when:
    inputBagForce == ""
@@ -164,8 +177,15 @@ process getBag {

    # deriva-download replicate RID
    echo -e "LOG: fetching bag for ${repRID} in GUDMAP" >> ${repRID}.getBag.log
-    deriva-download-cli staging.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID}
+    deriva-download-cli ${source} --catalog 2 ${replicateExportConfig} . rid=${repRID}
    echo -e "LOG: fetched" >> ${repRID}.getBag.log
+
+    name=\$(ls *.zip)
+    name=\$(basename \${name} | cut -d "." -f1)
+    yr=\$(date +'%Y')
+    mn=\$(date +'%m')
+    dy=\$(date +'%d')
+    mv \${name}.zip \${name}_\${yr}\${mn}\${dy}.zip
    """
 }

@@ -177,6 +197,10 @@ if (inputBagForce != "") {
 } else {
  inputBag = bag
 }
+inputBag.into {
+  inputBag_getData
+  inputBag_uploadInputBag
+}

 /*
 * getData: fetch study files from consortium with downloaded bdbag.zip
@@ -187,7 +211,7 @@ process getData {
  input:
    path script_bdbagFetch
    path cookies, stageAs: "deriva-cookies.txt" from bdbag
-    path inputBag
+    path inputBag from inputBag_getData

  output:
    path ("*.R{1,2}.fastq.gz") into fastqs
@@ -207,7 +231,7 @@ process getData {
    echo -e "LOG: linked" >> ${repRID}.getData.log

    # get bag basename
-    replicate=\$(basename "${inputBag}" | cut -d "." -f1)
+    replicate=\$(basename "${inputBag}")
    echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log

    # unzip bag
@@ -217,7 +241,7 @@ process getData {

    # bag fetch fastq's only and rename by repRID
    echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
-    sh ${script_bdbagFetch} \${replicate} ${repRID}
+    sh ${script_bdbagFetch} \${replicate::-13} ${repRID}
    echo -e "LOG: fetched" >> ${repRID}.getData.log
    """
 }
@@ -249,7 +273,7 @@ process parseMetadata {
    path experiment from experimentMeta

  output:
-    path "design.csv" into metadata
+    path "design.csv" into metadata_fl

  script:
    """
@@ -310,7 +334,7 @@ speciesMeta = Channel.create()
 readLengthMeta = Channel.create()
 expRID = Channel.create()
 studyRID = Channel.create()
-metadata.splitCsv(sep: ",", header: false).separate(
+metadata_fl.splitCsv(sep: ",", header: false).separate(
  endsMeta,
  endsManual,
  strandedMeta,
@@ -320,6 +344,7 @@ metadata.splitCsv(sep: ",", header: false).separate(
  expRID,
  studyRID
 )
+
 // Replicate metadata for multiple process inputs
 endsManual.into {
  endsManual_trimData
@@ -327,6 +352,16 @@ endsManual.into {
  endsManual_alignSampleData
  endsManual_aggrQC
 }
+studyRID.into {
+  studyRID_aggrQC
+  studyRID_uploadInputBag
+  studyRID_uploadProcessedFile
+  studyRID_uploadOutputBag
+}
+expRID.into {
+  expRID_aggrQC
+  expRID_uploadProcessedFile
+}


 /*
@@ -336,14 +371,14 @@ process trimData {
  tag "${repRID}"

  input:
-    val ends from endsManual_trimData
    path (fastq) from fastqs_trimData
+    val ends from endsManual_trimData

  output:
    path ("*.fq.gz") into fastqsTrim
    path ("*.fastq.gz", includeInputs:true) into fastqs_fastqc
    path ("*_trimming_report.txt") into trimQC
-    path ("readLength.csv") into inferMetadata_readLength
+    path ("readLength.csv") into readLengthInfer_fl

  script:
    """
@@ -371,11 +406,16 @@ process trimData {

 // Extract calculated read length metadata into channel
 readLengthInfer = Channel.create()
-inferMetadata_readLength.splitCsv(sep: ",", header: false).separate(
+readLengthInfer_fl.splitCsv(sep: ",", header: false).separate(
  readLengthInfer
 )

-// Replicate trimmed fastq's
+// Replicate infered read length for multiple process inputs
+readLengthInfer.into {
+  readLengthInfer_aggrQC
+  readLengthInfer_uploadQC
+}
+// Replicate trimmed fastq's for multiple process inputs
 fastqsTrim.into {
  fastqsTrim_alignData
  fastqsTrim_downsampleData
@@ -450,9 +490,9 @@ process getRefInfer {
        query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version=${refName}${refERCCVersion}/Annotation_Version=${refName}${refERCCVersion}')
      fi
      curl --request GET \${query} > refQuery.json
-      refURL=\$(python extractRefData.py --returnParam URL)
+      refURL=\$(python ${script_refDataInfer} --returnParam URL)
      loc=\$(dirname \${refURL})
-      fName=\$(python extractRefData.py --returnParam fName)
+      fName=\$(python ${script_refDataInfer} --returnParam fName)
      fName=\${fName%.*}
      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
@@ -483,8 +523,8 @@ process downsampleData {
  tag "${repRID}"

  input:
-    val ends from endsManual_downsampleData
    path fastq from fastqsTrim_downsampleData
+    val ends from endsManual_downsampleData

  output:
    path ("sampled.1.fq") into fastqs1Sample
@@ -575,7 +615,7 @@ process inferMetadata {
    path alignSummary from alignSampleQC_inferMetadata.collect()

  output:
-    path "infer.csv" into inferMetadata
+    path "infer.csv" into inferMetadata_fl
    path "${repRID}.infer_experiment.txt" into inferExperiment

  script:
@@ -642,18 +682,18 @@ process inferMetadata {
    infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
    echo -e "LOG: infered" >> ${repRID}.inferMetadata.log

-    ended=`bash inferMeta.sh endness ${repRID}.infer_experiment.txt`
-    fail=`bash inferMeta.sh fail ${repRID}.infer_experiment.txt`
+    ended=`bash ${script_inferMeta} endness ${repRID}.infer_experiment.txt`
+    fail=`bash ${script_inferMeta} fail ${repRID}.infer_experiment.txt`
    if [ \${ended} == "PairEnd" ]
    then
      ends="pe"
-      percentF=`bash inferMeta.sh pef ${repRID}.infer_experiment.txt`
-      percentR=`bash inferMeta.sh per ${repRID}.infer_experiment.txt`
+      percentF=`bash ${script_inferMeta} pef ${repRID}.infer_experiment.txt`
+      percentR=`bash ${script_inferMeta} per ${repRID}.infer_experiment.txt`
    elif [ \${ended} == "SingleEnd" ]
    then
      ends="se"
-      percentF=`bash inferMeta.sh sef ${repRID}.infer_experiment.txt`
-      percentR=`bash inferMeta.sh ser ${repRID}.infer_experiment.txt`
+      percentF=`bash ${script_inferMeta} sef ${repRID}.infer_experiment.txt`
+      percentR=`bash ${script_inferMeta} ser ${repRID}.infer_experiment.txt`
    fi
    echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
    echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
@@ -684,7 +724,7 @@ align_moInfer = Channel.create()
 percentFInfer = Channel.create()
 percentRInfer = Channel.create()
 failInfer = Channel.create()
-inferMetadata.splitCsv(sep: ",", header: false).separate(
+inferMetadata_fl.splitCsv(sep: ",", header: false).separate(
  endsInfer,
  strandedInfer,
  spikeInfer,
@@ -703,20 +743,24 @@ endsInfer.into {
  endsInfer_countData
  endsInfer_dataQC
  endsInfer_aggrQC
+  endsInfer_uploadQC
 }
 strandedInfer.into {
  strandedInfer_alignData
  strandedInfer_countData
  strandedInfer_aggrQC
+  strandedInfer_uploadQC
 }
 spikeInfer.into{
  spikeInfer_getRef
  spikeInfer_aggrQC
+  spikeInfer_uploadExecutionRun
 }
 speciesInfer.into {
  speciesInfer_getRef
  speciesInfer_aggrQC
-  speciesInfer_outputBag
+  speciesInfer_uploadExecutionRun
+  speciesInfer_uploadProcessedFile
 }


@@ -727,8 +771,8 @@ process getRef {
  tag "${species}"

  input:
-    path credential, stageAs: "credential.json" from deriva_getRef
    path script_refData
+    path credential, stageAs: "credential.json" from deriva_getRef
    val spike from spikeInfer_getRef
    val species from speciesInfer_getRef

@@ -796,9 +840,9 @@ process getRef {
      GENCODE=\$(echo \${references} | grep -o \${refName}.* | cut -d '.' -f3)
      query=\$(echo 'https://${referenceBase}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
      curl --request GET \${query} > refQuery.json
-      refURL=\$(python extractRefData.py --returnParam URL)
+      refURL=\$(python ${script_refData} --returnParam URL)
      loc=\$(dirname \${refURL})
-      fName=\$(python extractRefData.py --returnParam fName)
+      fName=\$(python ${script_refData} --returnParam fName)
      fName=\${fName%.*}
      if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
      filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
@@ -824,10 +868,10 @@ process alignData {
  tag "${repRID}"

  input:
-    val ends from endsInfer_alignData
-    val stranded from strandedInfer_alignData
    path fastq from fastqsTrim_alignData
    path reference_alignData
+    val ends from endsInfer_alignData
+    val stranded from strandedInfer_alignData

  output:
    tuple path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam
@@ -897,8 +941,8 @@ process dedupData {
    tuple path (bam), path (bai) from rawBam_dedupData

  output:
-    tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam
-    tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam
+    tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam
+    tuple path ("${repRID}_sorted.deduped.*.bam"), path ("${repRID}_sorted.deduped.*.bam.bai") into dedupChrBam
    path ("*.deduped.Metrics.txt") into dedupQC

  script:
@@ -913,16 +957,16 @@ process dedupData {

    # sort the bam file using Samtools
    echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log
-    samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam
+    samtools sort -@ `nproc` -O BAM -o ${repRID}_sorted.deduped.bam ${repRID}.deduped.bam

    # index the sorted bam using Samtools
    echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log
-    samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai
+    samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai

    # split the deduped BAM file for multi-threaded tin calculation
-    for i in `samtools view ${repRID}.sorted.deduped.bam | cut -f3 | sort | uniq`;
+    for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | sort | uniq`;
      do
-      echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}.sorted.deduped.bam \${i} 1>> ${repRID}.sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}.sorted.deduped.\${i}.bam ${repRID}.sorted.deduped.\${i}.bam.bai"
+      echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai"
    done | parallel -j `nproc` -k
    """
 }
@@ -932,6 +976,7 @@ dedupBam.into {
  dedupBam_countData
  dedupBam_makeBigWig
  dedupBam_dataQC
+  dedupBam_uploadProcessedFile
 }

 /*
@@ -945,7 +990,7 @@ process makeBigWig {
    tuple path (bam), path (bai) from dedupBam_makeBigWig

  output:
-    path ("${repRID}.bw")
+    path ("${repRID}_sorted.deduped.bw") into bigwig

  script:
    """
@@ -954,7 +999,7 @@ process makeBigWig {

    # create bigwig
    echo -e "LOG: creating bibWig" >> ${repRID}.makeBigWig.log
-    bamCoverage -p `nproc` -b ${bam} -o ${repRID}.bw
+    bamCoverage -p `nproc` -b ${bam} -o ${repRID}_sorted.deduped.bw
    echo -e "LOG: created" >> ${repRID}.makeBigWig.log
    """
 }
@@ -964,7 +1009,7 @@ process makeBigWig {
 */
 process countData {
  tag "${repRID}"
-  publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*.tpmTable.csv"
+  publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*_tpmTable.csv"

  input:
    path script_calculateTPM
@@ -975,9 +1020,9 @@ process countData {
    val stranded from strandedInfer_countData

  output:
-    path ("*.tpmTable.csv") into counts
-    path ("*.countData.summary") into countsQC
-    path ("assignedReads.csv") into inferMetadata_assignedReads
+    path ("*_tpmTable.csv") into counts
+    path ("*_countData.summary") into countsQC
+    path ("assignedReads.csv") into assignedReadsInfer_fl

  script:
    """
@@ -1004,32 +1049,38 @@ process countData {
    echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log
    if [ "${ends}" == "se" ]
    then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
+      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
    elif [ "${ends}" == "pe" ]
    then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
+      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
    fi
    echo -e "LOG: counted" >> ${repRID}.countData.log

    # extract assigned reads
-    grep -m 1 'Assigned' *.countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv
+    grep -m 1 'Assigned' *_countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv

    # calculate TPM from the resulting countData table
    echo -e "LOG: calculating TPM with R" >> ${repRID}.countData.log
-    Rscript calculateTPM.R --count "${repRID}.countData"
+    Rscript ${script_calculateTPM} --count "${repRID}_countData"

    # convert gene symbols to Entrez id's
    echo -e "LOG: convert gene symbols to Entrez id's" >> ${repRID}.countData.log
-    Rscript convertGeneSymbols.R --repRID "${repRID}"
+    Rscript ${script_convertGeneSymbols} --repRID "${repRID}"
    """
 }

 // Extract number of assigned reads metadata into channel
 assignedReadsInfer = Channel.create()
-inferMetadata_assignedReads.splitCsv(sep: ",", header: false).separate(
+assignedReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
  assignedReadsInfer
 )

+// Replicate infered assigned reads for multiple process inputs
+assignedReadsInfer.into {
+  assignedReadsInfer_aggrQC
+  assignedReadsInfer_uploadQC
+}
+
 /*
 *fastqc: run fastqc on untrimmed fastq's
 */
@@ -1041,7 +1092,7 @@ process fastqc {

  output:
    path ("*_fastqc.zip") into fastqc
-    path ("rawReads.csv") into inferMetadata_rawReads
+    path ("rawReads.csv") into rawReadsInfer_fl

  script:
    """
@@ -1059,10 +1110,16 @@ process fastqc {

 // Extract number of raw reads metadata into channel
 rawReadsInfer = Channel.create()
-inferMetadata_rawReads.splitCsv(sep: ",", header: false).separate(
+rawReadsInfer_fl.splitCsv(sep: ",", header: false).separate(
  rawReadsInfer
 )

+// Replicate infered raw reads for multiple process inputs
+rawReadsInfer.into {
+  rawReadsInfer_aggrQC
+  rawReadsInfer_uploadQC
+}
+
 /*
 *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
 */
@@ -1077,9 +1134,9 @@ process dataQC {
    val ends from endsInfer_dataQC

  output:
-    path "${repRID}.tin.hist.tsv" into tinHist
-    path "${repRID}.tin.med.csv" into inferMetadata_tinMed
-    path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance
+    path "${repRID}_tin.hist.tsv" into tinHist
+    path "${repRID}_tin.med.csv" into  tinMedInfer_fl
+    path "${repRID}_insertSize.inner_distance_freq.txt" into innerDistance

  script:
    """
@@ -1087,10 +1144,10 @@ process dataQC {
    ulimit -a >> ${repRID}.dataQC.log

    # calcualte TIN values per feature on each chromosome
-    echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}.sorted.deduped.tin.xls
+    echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}_sorted.deduped.tin.xls
    for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do
-      echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}.sorted.deduped.\${i}.bam  -r ./bed/genome.bed; cat ${repRID}.sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
-    done | parallel -j `nproc` -k 1>> ${repRID}.sorted.deduped.tin.xls
+      echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam  -r ./bed/genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
+    done | parallel -j `nproc` -k 1>> ${repRID}_sorted.deduped.tin.xls

    # bin TIN values
    echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log
@@ -1101,19 +1158,19 @@ process dataQC {
    if [ "${ends}" == "pe" ]
    then
      echo -e "LOG: calculating inner distances for ${ends}" >> ${repRID}.dataQC.log
-      inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed
+      inner_distance.py -i "${bam}" -o ${repRID}_insertSize -r ./bed/genome.bed
      echo -e "LOG: calculated" >> ${repRID}.dataQC.log
    elif [ "${ends}" == "se" ]
    then
      echo -e "LOG: creating dummy inner distance file for ${ends}" >> ${repRID}.dataQC.log
-      touch ${repRID}.insertSize.inner_distance_freq.txt
+      touch ${repRID}_insertSize.inner_distance_freq.txt
    fi
    """
 }

 // Extract median TIN metadata into channel
 tinMedInfer = Channel.create()
-inferMetadata_tinMed.splitCsv(sep: ",", header: false).separate(
+tinMedInfer_fl.splitCsv(sep: ",", header: false).separate(
  tinMedInfer
 )

@@ -1149,12 +1206,12 @@ process aggrQC {
    val spikeI from spikeInfer_aggrQC
    val speciesI from speciesInfer_aggrQC
    val readLengthM from readLengthMeta
-    val readLengthI from readLengthInfer
-    val rawReadsI from rawReadsInfer
-    val assignedReadsI from assignedReadsInfer
+    val readLengthI from readLengthInfer_aggrQC
+    val rawReadsI from rawReadsInfer_aggrQC
+    val assignedReadsI from assignedReadsInfer_aggrQC
    val tinMedI from tinMedInfer
-    val expRID
-    val studyRID
+    val studyRID from studyRID_aggrQC
+    val expRID from expRID_aggrQC

  output:
    path "${repRID}.multiqc.html" into multiqc
@@ -1226,24 +1283,270 @@ process aggrQC {
    """
 }

+/* 
+ * uploadInputBag: uploads the input bag
+*/
+process uploadInputBag {
+  tag "${repRID}"
+
+  input:
+    path script_uploadInputBag
+    path credential, stageAs: "credential.json" from deriva_uploadInputBag
+    path inputBag from inputBag_uploadInputBag
+    val studyRID from studyRID_uploadInputBag
+
+  output:
+    path ("inputBagRID.csv") into inputBagRID_fl
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.uploadInputBag.log
+  ulimit -a >> ${repRID}.uploadInputBag.log
+
+  yr=\$(date +'%Y')
+  mn=\$(date +'%m')
+  dy=\$(date +'%d')
+
+  file=\$(basename -a ${inputBag})
+  md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
+  echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
+  size=\$(wc -c < ./\${file})
+  echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
+  
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
+  if [ "\${exist}" == "[]" ]
+  then
+      cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+      cookie=\${cookie:11:-1}
+
+      loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
+      inputBag_rid=\$(python3 ${script_uploadInputBag} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
+      echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
+      rid=\${inputBag_rid}
+  else
+      exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+      exist=\${exist:7:-6}
+      echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
+      rid=\${exist}
+  fi
+
+  echo \${rid} > inputBagRID.csv
+  """
+}
+
+// Extract input bag RID into channel
+inputBagRID = Channel.create()
+inputBagRID_fl.splitCsv(sep: ",", header: false).separate(
+  inputBagRID
+)
+
+/* 
+ * uploadExecutionRun: uploads the execution run
+*/
+process uploadExecutionRun {
+  tag "${repRID}"
+
+  input:
+    path script_uploadExecutionRun
+    path credential, stageAs: "credential.json" from deriva_uploadExecutionRun
+    val spike from spikeInfer_uploadExecutionRun
+    val species from speciesInfer_uploadExecutionRun
+    val inputBagRID
+    
+  output:
+    path ("executionRunRID.csv") into executionRunRID_fl
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.uploadExecutionRun.log
+  ulimit -a >> ${repRID}.uploadExecutionRun.log
+
+  echo LOG: searching for workflow RID - BICF mRNA ${workflow.manifest.version} >> ${repRID}.uploadExecutionRun.log
+  workflow=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Workflow/Name=BICF%20mRNA%20Replicate/Version=${workflow.manifest.version})
+  workflow=\$(echo \${workflow} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  workflow=\${workflow:7:-6}
+  echo LOG: workflow RID extracted - \${workflow} >> ${repRID}.uploadExecutionRun.log
+
+  if [ "${species}" == "Homo sapiens" ]
+  then
+    genomeName=\$(echo GRCh${refHuVersion})
+  elif [ "${species}" == "Mus musculus" ]
+  then
+    genomeName=\$(echo GRCm${refMoVersion})
+  fi
+  if [ "${spike}" == "yes" ]
+  then
+    genomeName=\$(echo \${genomeName}-S)
+  fi
+  echo LOG: searching for genome name - \${genomeName} >> ${repRID}.uploadExecutionRun.log
+  genome=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Name=\${genomeName}_indev)
+  genome=\$(echo \${genome} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+  genome=\${genome:7:-6}
+  echo LOG: genome RID extracted - \${genome} >> ${repRID}.uploadExecutionRun.log
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Execution_Run/Workflow=\${workflow}/Replicate=${repRID}/Input_Bag=${inputBagRID})
+  echo \${exist} >> ${repRID}.uploadExecutionRun.log
+  if [ "\${exist}" == "[]" ]
+  then
+    executionRun_rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u F)
+    echo LOG: execution run RID uploaded - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+  else
+    rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+    rid=\${rid:7:-6}
+    echo \${rid} >> ${repRID}.uploadExecutionRun.log
+    executionRun_rid=\$(python3 ${script_uploadExecutionRun} -r ${repRID} -w \${workflow} -g \${genome} -i ${inputBagRID} -s In-progress -d 'Run in process' -o ${source} -c \${cookie} -u \${rid})
+    echo LOG: execution run RID updated - \${executionRun_rid} >> ${repRID}.uploadExecutionRun.log
+  fi
+
+  echo \${executionRun_rid} > executionRunRID.csv
+  """
+}
+
+// Extract execution run RID into channel
+executionRunRID = Channel.create()
+executionRunRID_fl.splitCsv(sep: ",", header: false).separate(
+  executionRunRID
+)
+
+//
+executionRunRID.into {
+  executionRunRID_uploadQC
+  executionRunRID_uploadProcessedFile
+  executionRunRID_uploadOutputBag
+}
+
+/* 
+ * uploadQC: uploads the mRNA QC
+*/
+process uploadQC {
+  tag "${repRID}"
+
+  input:
+    path script_deleteEntry_uploadQC
+    path script_uploadQC
+    path credential, stageAs: "credential.json" from deriva_uploadQC
+    val executionRunRID from executionRunRID_uploadQC
+    val ends from endsInfer_uploadQC
+    val stranded from strandedInfer_uploadQC
+    val length from readLengthInfer_uploadQC
+    val rawCount from rawReadsInfer_uploadQC
+    val finalCount from assignedReadsInfer_uploadQC
+    
+    
+  output:
+    path ("qcRID.csv") into qcRID_fl
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.uploadQC.log
+  ulimit -a >> ${repRID}.uploadQC.log
+
+  if [ "${ends}" == "pe" ]
+  then
+    end="Paired End"
+  elif [ "${ends}" == "se" ]
+  then
+    end="Single Read"
+  fi
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=${repRID})
+  if [ "\${exist}" != "[]" ]
+  then
+    rids=\$(echo \${exist} | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//')
+    for rid in \${rids}
+    do
+      python3 ${script_deleteEntry_uploadQC} -r \${rid} -t mRNA_QC -o ${source} -c \${cookie}
+      echo LOG: old mRNA QC RID deleted - \${rid} >> ${repRID}.uploadQC.log
+    done
+    echo LOG: all old mRNA QC RIDs deleted >> ${repRID}.uploadQC.log
+  fi
+
+  qc_rid=\$(python3 ${script_uploadQC} -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -o ${source} -c \${cookie} -u F)
+  echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log
+
+  echo \${qc_rid} > qcRID.csv
+  """
+}
+
+// Extract mRNA qc RID into channel
+qcRID = Channel.create()
+qcRID_fl.splitCsv(sep: ",", header: false).separate(
+  qcRID
+)
+
 /*
 *ouputBag: create ouputBag
 */
-process outputBag {
+process uploadProcessedFile {
  tag "${repRID}"
  publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip"

  input:
+    path script_deleteEntry_uploadProcessedFile
+    path credential, stageAs: "credential.json" from deriva_uploadProcessedFile
+    path executionRunExportConfig
    path multiqc
    path multiqcJSON
-    val species from speciesInfer_outputBag
+    tuple path (bam),path (bai) from dedupBam_uploadProcessedFile
+    path bigwig
+    path counts
+    val species from speciesInfer_uploadProcessedFile
+    val studyRID from studyRID_uploadProcessedFile
+    val expRID from expRID_uploadProcessedFile
+    val executionRunRID from executionRunRID_uploadProcessedFile

  output:
-    path ("Replicate_*.zip") into outputBag
+    path ("${repRID}_Output_Bag.zip") into outputBag
+
+  when:
+    upload

  script:
  """
-  mkdir Replicate_${repRID}.outputBag
+  hostname > ${repRID}.outputBag.log
+  ulimit -a >> ${repRID}.outputBag.log
+
+  mkdir -p ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
+  cp ${bam} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
+  cp ${bai} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
+  cp ${bigwig} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
+  cp ${counts} ./deriva/Seq/pipeline/${studyRID}/${executionRunRID}/
+
+  cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+  cookie=\${cookie:11:-1}
+
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=${repRID})
+  if [ "\${exist}" != "[]" ]
+  then
+    rids=\$(echo \${exist} | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//')
+    for rid in \${rids}
+    do
+      python3 ${script_deleteEntry_uploadProcessedFile} -r \${rid} -t Processed_File -o ${source} -c \${cookie}
+    done
+    echo LOG: all old processed file RIDs deleted >> ${repRID}.uploadQC.log
+  fi
+
+  deriva-upload-cli --catalog 2 --token \${cookie:9} ${source} ./deriva
+  echo LOG: processed files uploaded >> ${repRID}.outputBag.log
+
+  deriva-download-cli --catalog 2 --token \${cookie:9} ${source} ${executionRunExportConfig} . rid=${executionRunRID}
+  echo LOG: execution run bag downloaded >> ${repRID}.outputBag.log
+
  echo -e "### Run Details" >> runDetails.md
  echo -e "**Workflow URL:** https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq" >> runDetails.md
  echo -e "**Workflow Version:** ${workflow.manifest.version}" >> runDetails.md
@@ -1260,13 +1563,85 @@ process outputBag {
  echo -e "**Genome Assembly Version:** \${genome} patch \${patch}" >> runDetails.md
  echo -e "**Annotation Version:** GENCODE release \${annotation}" >> runDetails.md
  echo -e "**Run ID:** ${repRID}" >> runDetails.md
-  cp runDetails.md Replicate_${repRID}.outputBag
-  cp ${multiqc} Replicate_${repRID}.outputBag
-  cp ${multiqcJSON} Replicate_${repRID}.outputBag
-  bdbag Replicate_${repRID}.outputBag --archiver zip
+  echo LOG: runDetails.md created >> ${repRID}.outputBag.log
+
+  unzip Execution_Run_${executionRunRID}.zip
+  yr=\$(date +'%Y')
+  mn=\$(date +'%m')
+  dy=\$(date +'%d')
+  mv Execution_Run_${executionRunRID} ${repRID}_Output_Bag_\${yr}\${mn}\${dy}
+  loc=./${repRID}_Output_Bag/data/assets/Study/${studyRID}/Experiment/${expRID}/Replicate/${repRID}/Execution_Run/${executionRunRID}/Output_Files/
+  mkdir -p \${loc}
+  cp runDetails.md \${loc}
+  cp ${multiqc} \${loc}
+  cp ${multiqcJSON} \${loc}
+
+  bdbag ./${repRID}_Output_Bag/ --update --archiver zip --debug
+  echo LOG: output bag created >> ${repRID}.outputBag.log
+  """
+}
+
+/* 
+ * uploadOutputBag: uploads the output bag
+*/
+process uploadOutputBag {
+  tag "${repRID}"
+
+  input:
+    path script_uploadOutputBag
+    path credential, stageAs: "credential.json" from deriva_uploadOutputBag
+    path outputBag
+    val studyRID from studyRID_uploadOutputBag
+    val executionRunRID from executionRunRID_uploadOutputBag
+
+  output:
+    path ("outputBagRID.csv") into outputBagRID_fl
+
+  when:
+    upload
+
+  script:
+  """
+  hostname > ${repRID}.uploadOutputBag.log
+  ulimit -a >> ${repRID}.uploadOutputBag.log
+
+  yr=\$(date +'%Y')
+  mn=\$(date +'%m')
+  dy=\$(date +'%d')
+
+  file=\$(basename -a ${outputBag})
+  md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
+  echo LOG: ${repRID} output bag md5 sum - \${md5} >> ${repRID}.uploadOutputBag.log
+  size=\$(wc -c < ./\${file})
+  echo LOG: ${repRID} output bag size - \${size} bytes >> ${repRID}.uploadOutputBag.log
+  
+  exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=\${md5})
+  if [ "\${exist}" == "[]" ]
+  then
+      cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
+      cookie=\${cookie:11:-1}
+
+      loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/output_bag/study/${studyRID}/replicate/${repRID}/\${file} --parents)
+      outputBag_rid=\$(python3 ${script_uploadOutputBag} -e ${executionRunRID} -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
+      echo LOG: output bag RID uploaded - \${outputBag_rid} >> ${repRID}.uploadOutputBag.log
+      rid=\${outputBag_rid}
+  else
+      exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
+      exist=\${exist:8:-6}
+      echo LOG: output bag RID already exists - \${exist} >> ${repRID}.uploadOutputBag.log
+      rid=\${exist}
+  fi
+
+  echo \${rid} > outputBagRID.csv
  """
 }

+// Extract output bag RID into channel
+outputBagRID = Channel.create()
+outputBagRID_fl.splitCsv(sep: ",", header: false).separate(
+  outputBagRID
+)
+

 workflow.onError = {
  subject = "$workflow.manifest.name FAILED: $params.repRID"

--- a/workflow/scripts/bdbagFetch.sh
+++ b/workflow/scripts/bdbagFetch.sh
--- a/workflow/scripts/convertGeneSymbols.R
+++ b/workflow/scripts/convertGeneSymbols.R
@@ -23,4 +23,4 @@ output <- merge(x=convert,y=countTable[,c("gene_name","gene_id","count","tpm")],
 colnames(output) <- c("GENCODE_Gene_Symbol","NCBI_GeneID","Ensembl_GeneID","count","tpm")
 output <- output[,c(1,3,2,4:5)]

-write.table(output,file=paste0(opt$repRID,".tpmTable.csv"),sep=",",row.names=FALSE,quote=FALSE)
+write.table(output,file=paste0(opt$repRID,"_tpmTable.csv"),sep=",",row.names=FALSE,quote=FALSE)
--- a/workflow/scripts/delete_entry.py
+++ b/workflow/scripts/delete_entry.py
+import argparse
+from deriva.core import ErmrestCatalog, get_credential, BaseCLI
+import sys
+import csv
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-r', '--RID', help="replicate RID", required=True)
+    parser.add_argument('-t', '--table', help="source table", required=True)
+    parser.add_argument('-o', '--host', help="datahub host", required=True)
+    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    args = parser.parse_args()
+    return args
+
+def main(hostname, catalog_number, credential):
+    catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
+    pb = catalog.getPathBuilder()
+    if args.table == 'mRNA_QC':
+        run_table = pb.RNASeq.mRNA_QC
+    elif args.table == "Processed_File":
+        run_table = pb.RNASeq.Processed_File
+
+    path = run_table.filter(run_table.RID == args.RID)
+    path.delete()
+    rid = args.RID
+    
+
+    print(rid + " deleted")
+
+
+if __name__ == '__main__':
+    args = get_args()
+    cli = BaseCLI("Custom RNASeq query", None, 1)
+    cli.remove_options(["--config-file"])
+    host = args.host
+    credentials = {"cookie": args.cookie}
+    main(host, 2, credentials)
\ No newline at end of file
--- a/workflow/scripts/extractRefData.py
+++ b/workflow/scripts/extractRefData.py
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -38,7 +38,7 @@ SOFTWARE_REGEX = {
    'Trim Galore!': ['version_trimgalore.txt', r"version (\S+)"],
    'HISAT2': ['version_hisat2.txt', r"version (\S+)"],
    'Samtools': ['version_samtools.txt', r"samtools (\S+)"],
-    'picard (MarkDuplicates)': ['version_markdups.txt', r"(\S\.\S{2}\.\S+)"],
+    'picard (MarkDuplicates)': ['version_markdups.txt', r"Version:(\S+)"],
    'featureCounts': ['version_featurecounts.txt', r"featureCounts v(\S+)"],
    'R': ['version_r.txt', r"R version (\S+)"],
    'deepTools': ['version_deeptools.txt', r"deeptools (\S+)"],