diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9a846f6e432738ec3ec1a11d9dcf4bca2d473ed1..1cf1103821fac0e31e30bafbb9901591bdf8c919 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,6 +14,11 @@ stages:
 
 getBag:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
   - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
@@ -21,14 +26,24 @@ getBag:
 
 getData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
-  - unzip ./test_data/bagit/Replicate_Q-Y5F6.zip
+  - unzip ./test_data/bag/Replicate_Q-Y5F6.zip
   - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST
   - pytest -m getData
 
 parseMetadata:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
   - exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
@@ -44,6 +59,11 @@ parseMetadata:
 
 inferMetadata:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - >
     align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
@@ -56,6 +76,11 @@ inferMetadata:
 
 getRef:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - mkdir -p hu
   - mkdir -p mo
@@ -64,6 +89,11 @@ getRef:
 
 trimData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
   - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
@@ -73,12 +103,22 @@ trimData:
 
 downsampleData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
   - pytest -m downsampleData
 
 alignData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
@@ -92,6 +132,11 @@ alignData:
 
 dedupData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
   - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
@@ -104,26 +149,49 @@ dedupData:
 
 countData:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
-  - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
-  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.featureCounts
+  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
+  - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
+  - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
+  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
+  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
   - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
   - pytest -m makeFeatureCounts
 
 makeBigWig:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
   - pytest -m makeBigWig
 
 fastqc:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
   - pytest -m fastqc
 
 dataQC:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
   - for i in {"chr8","chr4","chrY"}; do
@@ -132,6 +200,11 @@ dataQC:
 
 outputBag:
   stage: unit
+  only:
+    - push
+    - tags
+  except:
+    - merge_requests
   script:
   - mkdir test
   - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag test --archiver zip
@@ -140,6 +213,10 @@ outputBag:
 
 integration_se:
   stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
   - hostname
   - ulimit -a
@@ -150,11 +227,20 @@ integration_se:
     when: always
     paths:
       - output/qc/
+      - output/report/
       - SE_multiqc_data.json
     expire_in: 7 days
+  retry:
+    max: 1
+    when:
+      - always
 
 integration_pe:
   stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
   - hostname
   - ulimit -a
@@ -166,11 +252,87 @@ integration_pe:
     paths:
       - dag.png
       - output/qc/
+      - output/report/
       - PE_multiqc_data.json
     expire_in: 7 days
+  retry:
+    max: 1
+    when:
+      - always
+
+override_inputBag:
+  stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+  script:
+  - hostname
+  - ulimit -a
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/Replicate_Q-Y5F6.zip --ci true
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - inputBagOverride_PE_multiqc_data.json
+    expire_in: 7 days
+  retry:
+    max: 1
+    when:
+      - always
+
+override_fastq:
+  stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+  script:
+  - hostname
+  - ulimit -a
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - fastqOverride_PE_multiqc_data.json
+    expire_in: 7 days
+  retry:
+    max: 1
+    when:
+      - always
+    
+override_species:
+  stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+  script:
+  - hostname
+  - ulimit -a
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - speciesOverride_PE_multiqc_data.json
+    expire_in: 7 days
+  retry:
+    max: 1
+    when:
+      - always
+  
 
 consistency:
   stage: consistency
+  only: [merge_requests]
+  except:
+    variables:
+        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
   script:
   - grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt
   - grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee3ac2203267513b79b841638ae0f717e601b2aa..d955884646b77d5c5f43f1517a0aa3be8e505f08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,32 @@
-# v0.0.2 (in development)
+# v0.0.3 (in development)
+**User Facing**
+* TPM table:
+  * Add Ensembl Gene ID
+  * Rename columns: *GENCODE_Gene_Symbol*, *Ensembl_GeneID*, *NCBI_GeneID*
+* MultiQC output custom tables (html+JSON):
+  * Run table: *Session ID* and *Pipeline Version*
+  * Reference Table: *Species*, *Genome Reference Consortium Build*, *Genome Reference Consortium Patch*, *GENCODE Annotation Release* (outputs both human and mouse versions)
+* Add inputBag override param (`inputBagForce`) [`*.zip`]
+  * Uses provided inputBag instead of downloading from data-hub
+  * Still requires matching repRID input param
+* Add fastq override param (`fastqsForce`) [`R1`,`R2`]
+  * Uses provided fastq instead of downloading from data-hub
+  * Still requires matching repRID input param and will pull inputBag from data-hub to access submitted metadata for reporting
+* Add species override param (`speciesForce`) [`Mus musculus` or `Homo sapiens`]
+  * forces the use of the provided species
+  * ignores inferred ambiguous species
+
+**Background**
+* Add GeneSymbol/EnsemblID/EntrezID translation files to references
+
+*Known Bugs*
+* outputBag does not contain fetch for processed data
+* Does not include automatic data upload
+* Override params (inputBag, fastq, species) aren't checked for integrity
+
+<hr>
+
+# v0.0.2
 **User Facing**
 * Output:
   * inputBag
@@ -19,4 +47,4 @@
 **INITIAL BETA VERSION**\
 Does not include automatic data upload\
 This version is for initial upload of test data to GUDMAP/RBK data-hub for internal integration
-<hr>
+<hr>
\ No newline at end of file
diff --git a/README.md b/README.md
index 2216c2775a2145f8e021cb221b18414f38ed9304..49069c53ada7b1f8351684cea8ba161830369208 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,13 @@ To Run:
   * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version)
     * *current mouse* **38.p6.vM22** = GRCm38.p6 with GENCODE annotation release M22
     * *current human* **38.p6.v31** = GRCh38.p12 with GENCODE annotation release 31
+* ***Optional*** input overrides
+  * `--inputBagForce` utilizes a local replicate inputBag instead of downloading from the data-hub (still requires accurate repRID input)
+    * eg: `--inputBagForce test_data/bag/Replicate_Q-Y5F6.zip` (must be the expected bag structure)
+  * `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
+    * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
+  * `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses ambiguous species error
+    * eg: `--speciesForce 'Mus musculus'`
 * Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
   * `--ci` boolean (default = false)
   * `--dev` boolean (default = false)
diff --git a/docs/dag.png b/docs/dag.png
index 04aafa139a1715cdd8213ae7a81a25dfaf6b4dac..2a2ec56ccc0615d8b1c9c1f6ce73201c0073874d 100755
Binary files a/docs/dag.png and b/docs/dag.png differ
diff --git a/test_data/createTestData.sh b/test_data/createTestData.sh
index 47c73d46fba5510f64fe5f9bb5c540eb4ad618cc..aec4e91bb71d5bb79d87cd76994f6253cfe63ea3 100644
--- a/test_data/createTestData.sh
+++ b/test_data/createTestData.sh
@@ -9,12 +9,12 @@ mkdir -p NEW_test_data
 
 ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
 
-mkdir -p ./NEW_test_data/bagit
+mkdir -p ./NEW_test_data/bag
 singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
-cp Replicate_Q-Y5F6.zip ./NEW_test_data/bagit/Replicate_Q-Y5F6.zip
+cp Replicate_Q-Y5F6.zip ./NEW_test_data/bag/Replicate_Q-Y5F6.zip
 
 mkdir -p ./NEW_test_data/fastq
-unzip ./test_data/bagit/Replicate_Q-Y5F6.zip
+unzip ./test_data/bag/Replicate_Q-Y5F6.zip
 singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6
 cp Replicate_Q-Y5F6.R1.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R1.fastq.gz
 cp Replicate_Q-Y5F6.R2.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R2.fastq.gz
@@ -81,10 +81,14 @@ cp Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.
 
 mkdir -p ./NEW_test_data/counts
 mkdir -p ./NEW_test_data/counts/small
-singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
-singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se.featureCounts
-cp Q-Y5F6_1M.se.featureCounts ./NEW_test_data/counts/small/Q-Y5F6_1M.se.featureCounts
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
+ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
+singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam 
+singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se.countData
+singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
+cp Q-Y5F6_1M.se.featureCounts ./NEW_test_data/counts/small/Q-Y5F6_1M.se.countData
 cp Q-Y5F6_1M.se.countTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se.countTable.csv
+cp Q-Y5F6_1M.se.countTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se.tpmTable.csv
 
 mkdir -p ./NEW_test_data/bw
 mkdir -p ./NEW_test_data/bw/small
diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml
index db6a335a8e5e6f8677dd8c433d391c6fd5224751..0c780d967f2859c1c76e02a63f3348c26049c694 100644
--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
@@ -48,14 +48,32 @@ top_modules:
         - '*infer_experiment*'
 
 report_section_order:
+    run:
+      order: 4000
     rid:
-      order: 2000
+      order: 3000
     meta:
+      order: 2000
+    ref:
       order: 1000
 
 skip_generalstats: true
 
 custom_data:
+    run:
+        file_format: 'tsv'
+        section_name: 'Run'
+        description: 'This is the run information'
+        plot_type: 'table'
+        pconfig:
+            id: 'run'
+            scale: false
+            format: '{}'
+        headers:
+            Session
+            Session ID
+            Pipeline Version
+            Input
     rid:
         file_format: 'tsv'
         section_name: 'RID'
@@ -63,7 +81,10 @@ custom_data:
         plot_type: 'table'
         pconfig:
             id: 'rid'
+            scale: false
+            format: '{}'
         headers:
+            Replicate
             Replicate RID
             Experiment RID
             Study RID
@@ -74,6 +95,7 @@ custom_data:
         plot_type: 'table'
         pconfig:
             id: 'meta'
+            scale: false
             format: '{:,.0f}'
         headers:
             Source
@@ -85,6 +107,21 @@ custom_data:
             Assigned Reads
             Median Read Length
             Median TIN
+            Pipeline Version
+    ref:
+        file_format: 'tsv'
+        section_name: 'Reference'
+        description: 'This is the referenec version information'
+        plot_type: 'table'
+        pconfig:
+            id: 'ref'
+            scale: false
+            format: '{}'
+        headers:
+            Species
+            Genome Reference Consortium Build
+            Genome Reference Consortium Patch
+            GENCODE Annotation Release"
     tin:
         file_format: 'tsv'
         section_name: 'TIN'
@@ -106,9 +143,13 @@ custom_data:
             90 - 99
 
 sp:
+    run:
+        fn: "run.tsv"
     rid:
         fn: 'rid.tsv'
     meta:
         fn: 'metadata.tsv'
+    ref:
+        fn: 'reference.tsv'
     tin:
-        fn: '*.tin.hist.tsv'
+        fn: '*.tin.hist.tsv'
\ No newline at end of file
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index 660ec331080a9235d0e7eddc630a48500ead61f3..fd6f6f02be0aa82e19fa3e704cc3d609eca8bd99 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -97,6 +97,6 @@ manifest {
   homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
   description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
   mainScript = 'rna-seq.nf'
-  version = 'v0.0.2_indev'
+  version = 'v0.0.3_indev'
   nextflowVersion = '>=19.09.0'
 }
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index a311b9e876249c0d4cfbbb1cd13e7d94b1902807..b6687df5d29f1d1b60d017c3d42ebaebf4edbcd0 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -19,6 +19,11 @@ params.refHuVersion = "38.p12.v31"
 params.refERCCVersion = "92"
 params.outDir = "${baseDir}/../output"
 
+// Define override input variable
+params.inputBagForce = ""
+params.fastqsForce = ""
+params.speciesForce = ""
+
 // Parse input variables
 deriva = Channel
   .fromPath(params.deriva)
@@ -32,6 +37,9 @@ refHuVersion = params.refHuVersion
 refERCCVersion = params.refERCCVersion
 outDir = params.outDir
 logsDir = "${outDir}/Logs"
+inputBagForce = params.inputBagForce
+fastqsForce = params.fastqsForce
+speciesForce = params.speciesForce
 
 // Define fixed files
 derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
@@ -117,7 +125,10 @@ process getBag {
     path derivaConfig
 
   output:
-    path ("Replicate_*.zip") into bagit
+    path ("Replicate_*.zip") into bag
+
+  when:
+    inputBagForce == ""
 
   script:
     """
@@ -131,12 +142,21 @@ process getBag {
     echo -e "LOG: linked" >> ${repRID}.getBag.log
 
     # deriva-download replicate RID
-    echo -e "LOG: fetching bagit for ${repRID} in GUDMAP" >> ${repRID}.getBag.log
+    echo -e "LOG: fetching bag for ${repRID} in GUDMAP" >> ${repRID}.getBag.log
     deriva-download-cli ${source} --catalog 2 ${derivaConfig} . rid=${repRID}
     echo -e "LOG: fetched" >> ${repRID}.getBag.log
     """
 }
 
+// Set inputBag to downloaded or forced input
+if (inputBagForce != "") {
+  inputBag = Channel
+    .fromPath(inputBagForce)
+    .ifEmpty { exit 1, "override inputBag file not found: ${inputBagForce}" }
+} else {
+  inputBag = bag
+}
+
 /*
  * getData: fetch study files from consortium with downloaded bdbag.zip
  */
@@ -146,7 +166,7 @@ process getData {
   input:
     path script_bdbagFetch
     path cookies, stageAs: "deriva-cookies.txt" from bdbag
-    path bagit
+    path inputBag
 
   output:
     path ("*.R{1,2}.fastq.gz") into fastqs
@@ -158,33 +178,43 @@ process getData {
     """
     hostname > ${repRID}.getData.log
     ulimit -a >> ${repRID}.getData.log
-    
+
     # link deriva cookie for authentication
     echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log
     mkdir -p ~/.bdbag
     ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
     echo -e "LOG: linked" >> ${repRID}.getData.log
     
-    # get bagit basename
-    replicate=\$(basename "${bagit}" | cut -d "." -f1)
-    echo -e "LOG: bagit replicate name \${replicate}" >> ${repRID}.getData.log
+    # get bag basename
+    replicate=\$(basename "${inputBag}" | cut -d "." -f1)
+    echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
     
-    # unzip bagit
-    echo -e "LOG: unzipping replicate bagit" >> ${repRID}.getData.log
-    unzip ${bagit}
+    # unzip bag
+    echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log
+    unzip ${inputBag}
     echo -e "LOG: unzipped" >> ${repRID}.getData.log
     
-    # bagit fetch fastq's only and rename by repRID
+    # bag fetch fastq's only and rename by repRID
     echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
     sh ${script_bdbagFetch} \${replicate} ${repRID}
     echo -e "LOG: fetched" >> ${repRID}.getData.log
     """
 }
 
-// Replicate raw fastq's for multiple process inputs
-fastqs.into {
-  fastqs_trimData
-  fastqs_fastqc
+// Set raw fastq to downloaded or forced input and replicate them for multiple process inputs
+if (fastqsForce != "") {
+  Channel
+    .fromPath(fastqsForce)
+    .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
+    .collect().into {
+      fastqs_trimData
+      fastqs_fastqc
+    }
+} else {
+  fastqs.into {
+    fastqs_trimData
+    fastqs_fastqc
+  }
 }
 
 /*
@@ -533,7 +563,24 @@ process inferMetadata {
       bed="./GRCm/bed/genome.bed"
     else
       echo -e "LOG: ERROR - inference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
-      exit 1
+      if [ "${speciesForce}" == "" ]
+      then
+        exit 1
+      fi
+    fi
+    if [ "${speciesForce}" != "" ]
+    then
+      echo -e "LOG: species overridden to: ${speciesForce}"
+      species="${speciesForce}"
+      if [ "${speciesForce}" == "Homo sapiens" ]
+      then
+        bam="GRCh.sampled.sorted.bam"
+        bed="./GRCh/bed/genome.bed"
+      elif [ "${speciesForce}" == "Mus musculus" ]
+      then
+        bam="GRCm.sampled.sorted.bam"
+        bed="./GRCm/bed/genome.bed"
+      fi
     fi
     echo -e "LOG: inference of species results in: \${species}" >> ${repRID}.inferMetadata.log
 
@@ -875,10 +922,10 @@ process countData {
     echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log
     if [ "${ends}" == "se" ]
     then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
+      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
     elif [ "${ends}" == "pe" ]
     then
-      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
+      featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
     fi
     echo -e "LOG: counted" >> ${repRID}.countData.log
     
@@ -1034,18 +1081,53 @@ process aggrQC {
     hostname > ${repRID}.aggrQC.log
     ulimit -a >> ${repRID}.aggrQC.log
 
+    # make run table
+    if [ "${params.inputBagForce}" == "" ] && [ "${params.fastqsForce}" == "" ] && [ "${params.speciesForce}" == "" ]
+    then
+      input="default"
+    else
+      input="override:"
+      if [ "${params.inputBagForce}" != "" ]
+      then
+        input=\$(echo \${input} inputBag)
+      fi
+      if [ "${params.fastqsForce}" != "" ]
+      then
+        input=\$(echo \${input} fastq)
+      fi
+      if [ "${params.speciesForce}" != "" ]
+      then
+        input=\$(echo \${input} species)
+      fi
+    fi
+    echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log
+    echo -e "Session\tSession ID\tPipeline Version\tInput" > run.tsv
+    echo -e "Session\t${workflow.sessionId}\t${workflow.manifest.version}\t\${input}" >> run.tsv
+    
+    
     # make RID table
     echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log
-    echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv
-    echo -e "${repRID}\t${expRID}\t${studyRID}" >> rid.tsv
+    echo -e "Replicate\tReplicate RID\tExperiment RID\tStudy RID" > rid.tsv
+    echo -e "Replicate\t${repRID}\t${expRID}\t${studyRID}" >> rid.tsv
 
     # make metadata table
     echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
     echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv
     echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t'${readLengthM}'\t-" >> metadata.tsv
-    echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
+    if [ "${params.speciesForce}" == "" ]
+    then
+      echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
+    else
+      echo -e "Infered\t${speciesI} (FORCED)\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv
+    fi
     echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv
 
+    # make reference table
+    echo -e "LOG: creating referencerun table" >> ${repRID}.aggrQC.log
+    echo -e "Species\tGenome Reference Consortium Build\tGenome Reference Consortium Patch\tGENCODE Annotation Release" > reference.tsv
+    echo -e "Human\tGRCh\$(echo `echo ${params.refHuVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refHuVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refHuVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
+    echo -e "Mouse\tGRCm\$(echo `echo ${params.refMoVersion} | cut -d "." -f 1`)\t\$(echo `echo ${params.refMoVersion} | cut -d "." -f 2`)\t'\$(echo `echo ${params.refMoVersion} | cut -d "." -f 3 | sed "s/^v//"`)'" >> reference.tsv
+
     # remove inner distance report if it is empty (SE repRID)
     echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
     if [ "${endsM}" == "se" ]
@@ -1081,5 +1163,4 @@ process outputBag {
   cp ${multiqcJSON} Replicate_${repRID}.outputBag
   bdbag Replicate_${repRID}.outputBag --archiver zip
   """
-}
-
+}
\ No newline at end of file
diff --git a/workflow/scripts/calculateTPM.R b/workflow/scripts/calculateTPM.R
index d4851d4806c5e12e06416a103f2e6972a8b4befe..a26bf943dce7d082ed03b77939390abb022bba82 100644
--- a/workflow/scripts/calculateTPM.R
+++ b/workflow/scripts/calculateTPM.R
@@ -13,10 +13,13 @@ if (!("count" %in% names(opt))){
   stop("Count file doesn't exist, exiting.")
 }
 
-repRID <- basename(gsub(".featureCounts","",opt$count))
+repRID <- basename(gsub(".countData","",opt$count))
 
 count <- read.delim(opt$count, comment.char="#") # if featureCounts file changes structure, be sure to update count and Length columns below
-colnames(count)[7] <- "count"
+colnames(count)[1] <- "gene_name"
+colnames(count)[7] <- "gene_id"
+colnames(count)[8] <- "count"
+count <- count[,c(1,7,2:6,8)]
 
 rpk <- count$count/count$Length/1000
 
diff --git a/workflow/scripts/convertGeneSymbols.R b/workflow/scripts/convertGeneSymbols.R
index 92d3c3e2e8ad97ebacbca3c5f9d0fc185ceafb7e..49752f1bba5a4dd8d91ed8609c6f2f82b8fafacc 100644
--- a/workflow/scripts/convertGeneSymbols.R
+++ b/workflow/scripts/convertGeneSymbols.R
@@ -7,18 +7,20 @@ option_list=list(
 opt=parse_args(OptionParser(option_list=option_list))
 rm(option_list)
 
-countTable <- read.csv(paste0(opt$repRID,".countData.countTable.csv"), stringsAsFactors=FALSE)
+countTable <- read.csv(paste0(opt$repRID,".countTable.csv"), stringsAsFactors=FALSE)
 geneID <- read.delim("geneID.tsv", header=FALSE, stringsAsFactors=FALSE)
 Entrez <- read.delim("Entrez.tsv", header=FALSE, stringsAsFactors=FALSE)
 
-convert <- data.frame(geneID=countTable$Geneid)
-convert <- merge(x=convert,y=geneID[,1:2],by.x="geneID",by.y="V2",all.x=TRUE)
+convert <- data.frame(gene_name=countTable$gene_name)
+convert <- merge(x=convert,y=geneID[,1:2],by.x="gene_name",by.y="V2",all.x=TRUE)
 convert <- merge(x=convert,y=Entrez,by.x="V1",by.y="V1",all.x=TRUE)
 convert[is.na(convert$V2),3] <- ""
 convert <- convert[,-1]
 colnames(convert) <- c("GeneID","EntrezID")
 convert <- unique(convert)
 
-output <- merge(x=convert,y=countTable[,c("Geneid","count","tpm")],by.x="GeneID",by.y="Geneid",all.x=TRUE)
+output <- merge(x=convert,y=countTable[,c("gene_name","gene_id","count","tpm")],by.x="GeneID",by.y="gene_name",all.x=TRUE)
+colnames(output) <- c("GENCODE_Gene_Symbol","NCBI_GeneID","Ensembl_GeneID","count","tpm")
+output <- output[,c(1,3,2,4:5)]
 
 write.table(output,file=paste0(opt$repRID,".tpmTable.csv"),sep=",",row.names=FALSE,quote=FALSE)
diff --git a/workflow/tests/test_makeFeatureCounts.py b/workflow/tests/test_makeFeatureCounts.py
index d741a2f4ac0f33fe15af8f13300c95002a00a887..d33527a9c7bffcaa9919639b72842cc8cf63b14f 100644
--- a/workflow/tests/test_makeFeatureCounts.py
+++ b/workflow/tests/test_makeFeatureCounts.py
@@ -11,5 +11,6 @@ data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 
 @pytest.mark.makeFeatureCounts
 def test_makeFeatureCounts():
-	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5F6_1M.se.featureCounts'))
+	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5F6_1M.se.countData'))
 	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5F6_1M.se.countTable.csv'))
+	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5F6_1M.se.tpmTable.csv'))