diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3d66ceb48acf6e3fde51ccfdeb9261654fa3d6f8..658a53c878fac2c68a06e42334d57c05a8a58873 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -32,9 +32,21 @@ parseMetadata:
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p ends
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual
-  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded -e se
+  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species
 
+inferMetadata:
+  stage: unit
+  script:
+  - >
+    align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5JA_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
+    if [[ ${align} == "" ]]; then exit 1; fi
+  - >
+    singularity run 'docker://bicf/rseqc3.0:2.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam" 1>> Q-Y5JA_1M.se.inferMetadata.log &&
+    ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5JA_1M.se.inferMetadata.log` &&
+    if [[ ${ended} == "" ]]; then exit 1; fi
+  - pytest -m inferMetadata
+
 getRef:
   stage: unit
   script:
@@ -50,14 +62,20 @@ trimData:
   - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5JA_1M.pe -j 20 ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5JA_1M.R2.fastq.gz
   - pytest -m trimData
 
+downsampleData:
+  stage: unit
+  script:
+  - singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz 1000 1> sampled.1.fq
+  - pytest -m downsampleData
+
 alignData:
   stage: unit
   script:
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz --summary-file ${repRID}.alignSummary.txt --new-summary
+  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz --summary-file Q-Y5JA_1M.se.alignSummary.txt --new-summary
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5JA_1M.se.bam Q-Y5JA_1M.se.sam
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.bam
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.sorted.bam.bai
-  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz --summary-file ${repRID}.alignSummary.txt --new-summary
+  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz --summary-file Q-Y5JA_1M.pe.alignSummary.txt --new-summary
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5JA_1M.pe.bam Q-Y5JA_1M.pe.sam
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.bam
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bam.bai
@@ -69,36 +87,39 @@ dedupData:
   - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
   - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5JA_1M.se.deduped.bam
   - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam Q-Y5JA_1M.se.sorted.deduped.bam.bai
-  - for i in {"chr8","chr4","chrY"}; do 
+  - >
+    for i in {"chr8","chr4","chrY"}; do 
       echo "samtools view -b Q-Y5JA_1M.se.sorted.deduped.bam ${i} > Q-Y5JA_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5JA_1M.se.sorted.deduped.${i}.bam Q-Y5JA_1M.se.sorted.deduped.${i}.bam.bai;";
       done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
   - pytest -m dedupData
-  
-makeBigWig:
-  stage: unit
-  script:
-  - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw
-  - pytest -m makeBigWig
 
-makeFeatureCounts:
+countData:
   stage: unit
   script:
   - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -R SAM -p -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -T 20 -s 1 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -o Q-Y5JA_1M.se.featureCounts -g 'gene_name' --primary --ignoreDup -B ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam 
   - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5JA_1M.se.featureCounts
   - pytest -m makeFeatureCounts
 
+makeBigWig:
+  stage: unit
+  script:
+  - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw
+  - pytest -m makeBigWig
+
 fastqc:
   stage: unit
   script:
   - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o .
   - pytest -m fastqc
 
-inferMetadata:
+dataQC:
   stage: unit
   script:
+  - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5JA_1M.se.sorted.deduped.tin.xls
   - for i in {"chr8","chr4","chrY"}; do
-    echo " tin.py -i /project/BICF/BICF_Core/shared/gudmap/test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5JA_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k > Q-Y5JA_1M.se.sorted.deduped.tin.xls
-  - pytest -m inferMetadata
+    echo "tin.py -i /project/BICF/BICF_Core/shared/gudmap/test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5JA_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k >> Q-Y5JA_1M.se.sorted.deduped.tin.xls
+  - pytest -m dataQC
+
 
 integration_se:
   stage: integration
diff --git a/README.md b/README.md
index fbb700ef12c3d006870dd8378741af9c5a404ca1..e2a78488d3dc1f4e605f593253697c28588a2909 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ RNA-Seq Analytic Pipeline for GUDMAP/RBK
 
 Introduction
 ------------
-This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.
+This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub. It is designed to run on the HPC cluster ([BioHPC](https://portal.biohpc.swmed.edu)) at UT Southwestern Medical Center (in conjunction with the standard nextflow profile: config `biohpc.config`)
 
 ![flowchart](docs/RNA-Seq%20Pipeline%20Design%20Flowchart.jpg "Flowchart")
 
@@ -21,18 +21,19 @@ This pipeline is also capable of being run on AWS. To do so:
   * Replace workDir with the S3 bucket generated
   * Change region if different
   * Change queue to the aws batch queue generated
-* The user must have awscli configured with an appropriate authentication (with ```aws configure``` and access keys) in the environment which nextflow will be run
-* Add ```-profile ``` with the name aws config which was customized
+* The user must have awscli configured with an appropriate authentication (with `aws configure` and access keys) in the environment which nextflow will be run
+* Add `-profile` with the name aws config which was customized
 
 To Run:
 -------
 * Available parameters:
-  * ```--deriva``` active **credential.json** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
-  * ```--bdbag``` active **cookies.txt** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
-  * ```--repRID``` mRNA-seq replicate RID
-  * ```--refMoVersion``` mouse reference version ***(optional)***
-  * ```--refHuVersion``` human reference version ***(optional)***
-  * ```-profile``` config profile to use: standard = local processes on BioHPC (default), biohpc = BioHPC cluster, aws_ondemand = AWS Batch on-demand instant requests, aws_spot = AWS Batch spot instance requests ***(optional)***
+  * `--deriva` active **credential.json** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
+  * `--bdbag` active **cookies.txt** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
+  * `--repRID` mRNA-seq replicate RID
+  * `--refMoVersion` mouse reference version ***(optional)***
+  * `--refHuVersion` human reference version ***(optional)***
+  * `--refERCCVersion` human reference version ***(optional)***
+  * `-profile` config profile to use: standard = processes on BioHPC cluster, aws_ondemand = AWS Batch on-demand instant requests, aws_spot = AWS Batch spot instance requests ***(optional)***
 * NOTES:
   * once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials)
   * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version)
@@ -96,4 +97,4 @@ Please cite in publications: Pipeline was developed by BICF from funding provide
 Pipeline Directed Acyclic Graph
 -------------------------------
 
-![dag](docs/dag.png "DAG")
\ No newline at end of file
+![dag](docs/dag.png "DAG")
diff --git a/docs/dag.png b/docs/dag.png
index f9c379f904ad2bc32b628556036517e98e77a165..1fedc931294b459b15780cc408faf38949a83986 100644
Binary files a/docs/dag.png and b/docs/dag.png differ
diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config
index 0c8b6ff0fd242cda9ca2fb43473e82f99547efef..79c5fc6d9431c377e4ac3ed16fde26f2192ded56 100755
--- a/workflow/conf/aws_ondemand.config
+++ b/workflow/conf/aws_ondemand.config
@@ -1,4 +1,4 @@
-workDir = 's3://gudmap.rbk/work'
+workDir = 's3://'
 aws.client.storageEncryption = 'AES256'
 aws {
   region = 'us-east-2'
@@ -9,29 +9,7 @@ aws {
 
 process {
   executor = 'awsbatch'
-  queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
+  queue = 'highpriority-'
   cpus = 1
   memory = '2 GB'
-
-  withName:parseMetadata {
-    cpus = 5
-  }
-  withName:getRef {
-    cpus = 8
-  }
-  withName:trimData {
-    cpus = 8
-    memory = '2 GB'
-  }
-  withName:alignData {
-    cpus = 50
-    memory = '5 GB'
-  }
-  withName:dedupData {
-    cpus = 2
-    memory = '20 GB'
-  }
-  withName:fastqc {
-    memory = '5 GB'
-  }
-}
\ No newline at end of file
+}
diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config
index 4708298a63ced25cd74edf2646fc3c67a0a7753c..f1935697bb1c1c7e6aeedf310d5da2f38da1811c 100755
--- a/workflow/conf/aws_spot.config
+++ b/workflow/conf/aws_spot.config
@@ -1,4 +1,4 @@
-workDir = 's3://gudmap.rbk/work'
+workDir = 's3://'
 aws.client.storageEncryption = 'AES256'
 aws {
   region = 'us-east-2'
@@ -9,29 +9,7 @@ aws {
 
 process {
   executor = 'awsbatch'
-  queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
+  queue = 'default-'
   cpus = 1
   memory = '2 GB'
-
-  withName:parseMetadata {
-    cpus = 5
-  }
-  withName:getRef {
-    cpus = 8
-  }
-  withName:trimData {
-    cpus = 8
-    memory = '2 GB'
-  }
-  withName:alignData {
-    cpus = 50
-    memory = '5 GB'
-  }
-  withName:dedupData {
-    cpus = 2
-    memory = '20 GB'
-  }
-  withName:fastq  {
-    memory = '5 GB'
-  }
 }
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index ed78dbe46c81d516caf2ffbdd19f024fc2dbe29d..9448731eec9d06e48bb0185188474734c38b1b00 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -3,34 +3,49 @@ process {
   queue = 'super'
   clusterOptions = '--hold'
 
-  withName:getBag {
+  withName: getBag {
     executor = 'local'
   }
-  withName:getData {
+  withName: getData {
     executor = 'local'
   }
-  withName:parseMetadata {
+  withName: parseMetadata {
     executor = 'local'
   }
-  withName:getRef {
+  withName: trimData {
+    queue = 'super'
+  }
+  withName: getRefInfer {
+    executor = 'local'
+  }
+  withName: downsampleData {
     executor = 'local'
   }
-  withName:trimData {
+  withName: alignSampleData {
+    queue = 'super'
+  }
+  withName: inferMetadata {
     queue = 'super'
   }
-  withName:alignData {
+  withName: getRef {
+    executor = 'local'
+  }
+  withName: alignData {
     queue = '256GB,256GBv1'
   }
   withName: dedupData {
     queue = 'super'
   }
-  withName:fastqc {
+  withName: countData {
+    queue = 'super'
+  }
+  withName: makeBigWig {
     queue = 'super'
   }
-  withName:inferMetadata {
+  withName: fastqc {
     queue = 'super'
   }
-  withName: makeBigWig {
+  withName: dataQC {
     queue = 'super'
   }
 }
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index f0be347ef28c9306abe43974d48464484f48b5c4..0888063278039f3e3e7e280735670cdb94852fd4 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -20,30 +20,42 @@ process {
   withName: parseMetadata {
     container = 'bicf/python3:1.3'
   }
-  withName: getRef {
-    container = 'bicf/awscli:1.1'
-  }
   withName: trimData {
     container = 'bicf/trimgalore:1.1'
   }
+  withName: getRefInfer {
+    container = 'bicf/awscli:1.1'
+  }
+  withName: downsampleData {
+    container = 'bicf/seqtk:2.0.0'
+  }
+  withName: alignSampleData {
+    container = 'bicf/gudmaprbkaligner:2.0.0'
+  }
+  withName: inferMetadata {
+    container = 'bicf/rseqc3.0:2.0.0'
+  }
+  withName: getRef {
+    container = 'bicf/awscli:1.1'
+  }
   withName: alignData {
     container = 'bicf/gudmaprbkaligner:2.0.0'
   }
   withName: dedupData {
     container = 'bicf/gudmaprbkdedup:2.0.0'
   }
+  withName: countData {
+    container = 'bicf/subread2:2.0.0'
+  }
   withName: makeBigWig {
     container = 'bicf/deeptools3.3:2.0.0'
   }
   withName: fastqc {
     container = 'bicf/fastqc:2.0.0'
   }
-  withName:inferMetadata{
+  withName: dataQC {
     container = 'bicf/rseqc3.0:2.0.0'
   }
-  withName: makeFeatureCounts {
-    container = 'bicf/subread2:2.0.0'
-  }
 }
 
 trace {
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index b082b15649cde7c8559edcbc8dd91fdb15ea00ef..615c68e9e902ffa3ebb9c6d934168050058aa56c 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -15,6 +15,7 @@ params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
 params.repRID = "Q-Y5JA"
 params.refMoVersion = "38.p6.vM22"
 params.refHuVersion = "38.p12.v31"
+params.refERCCVersion = "92"
 params.outDir = "${baseDir}/../output"
 
 // Parse input variables
@@ -27,6 +28,7 @@ bdbag = Channel
 repRID = params.repRID
 refMoVersion = params.refMoVersion
 refHuVersion = params.refHuVersion
+refERCCVersion = params.refERCCVersion
 outDir = params.outDir
 logsDir = "${outDir}/Logs"
 
@@ -34,6 +36,7 @@ logsDir = "${outDir}/Logs"
 derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
 //referenceBase = "s3://bicf-references"
 referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references"
+referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
 
 // Define script files
 script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
@@ -115,7 +118,7 @@ process getData {
     """
 }
 
-// Replicate raw fastqs for multiple process inputs
+// Replicate raw fastq's for multiple process inputs
 fastqs.into {
   fastqs_trimData
   fastqs_fastqc
@@ -130,7 +133,6 @@ process parseMetadata {
 
   input:
     path script_parseMeta
-    val repRID
     path fileMeta
     path experimentSettingsMeta
     path experimentMeta
@@ -157,7 +159,7 @@ process parseMetadata {
     echo "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.err
 
     # Get strandedness metadata
-    stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded -e \${endsManual})
+    stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded)
     echo "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.err
     
     # Get spike-in metadata
@@ -169,52 +171,354 @@ process parseMetadata {
     echo "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.err
 
     # Save design file
-    echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv
+    echo "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv
     """
 }
 
 // Split metadata into separate channels
-rep = Channel.create()
 endsMeta = Channel.create()
 endsManual = Channel.create()
-stranded = Channel.create()
-spike = Channel.create()
-species = Channel.create()
+strandedMeta = Channel.create()
+spikeMeta = Channel.create()
+speciesMeta = Channel.create()
 metadata.splitCsv(sep: ",", header: false).separate(
-  rep,
   endsMeta,
   endsManual,
-  stranded,
-  spike,
-  species
+  strandedMeta,
+  spikeMeta,
+  speciesMeta
 )
 // Replicate metadata for multiple process inputs
 endsManual.into {
   endsManual_trimData
-  endsManual_alignData
-  endsManual_featureCounts
+  endsManual_downsampleData
+  endsManual_alignSampleData
 }
-stranded.into {
-  stranded_alignData
-  stranded_featureCounts
+
+
+/*
+ * trimData: trims any adapter or non-host sequences from the data
+*/
+process trimData {
+  tag "${repRID}"
+  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.trimData.{out,err}"
+
+  input:
+    val ends from endsManual_trimData
+    path (fastq) from fastqs_trimData
+
+  output:
+    path ("*.fq.gz") into fastqsTrim
+    path ("*_trimming_report.txt") into trimQC
+    path ("${repRID}.trimData.{out,err}")
+
+  script:
+    """
+    hostname > ${repRID}.trimData.err
+    ulimit -a >> ${repRID}.trimData.err
+
+    #Trim fastq's using trim_galore
+    if [ "${ends}" == "se" ]
+    then
+      echo "LOG: running trim_galore using single-end settings" >> ${repRID}.trimData.err
+      trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err
+    elif [ "${ends}" == "pe" ]
+    then
+      echo "LOG: running trim_galore using paired-end settings" >> ${repRID}.trimData.err
+      trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err
+    fi
+    """
 }
-spike.into{
-  spike_getRef
+
+// Replicate trimmed fastq's
+fastqsTrim.into {
+  fastqsTrim_alignData
+  fastqsTrim_downsampleData
 }
-species.into {
-  species_getRef
+
+/*
+  * getRefInfer: Dowloads appropriate reference for metadata inference
+*/
+process getRefInfer {
+  tag "${refName}"
+  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.getRefInfer.{out,err}"
+
+  input:
+    val refName from referenceInfer
+
+  output:
+    tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf")  into refInfer
+    path ("${refName}", type: 'dir') into bedInfer
+    path ("${repRID}.getRefInfer.{out,err}")
+ 
+  script:
+    """
+    hostname > ${repRID}.getRefInfer.err
+    ulimit -a >> ${repRID}.getRefInfer.err
+    export https_proxy=\${http_proxy}
+
+    #Set the reference name
+    if [ "${refName}" == "ERCC" ]
+    then
+      references=\$(echo ${referenceBase}/ERCC${refERCCVersion})
+    elif [ "${refName}" == "GRCm" ]
+    then
+      references=\$(echo ${referenceBase}/GRCm${refMoVersion})
+    elif [ '${refName}' == "GRCh" ]
+    then
+      references=\$(echo ${referenceBase}/GRCh${refHuVersion})
+    else
+      echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.getRefInfer.err
+      exit 1
+    fi
+    mkdir ${refName}
+    #Retreive appropriate reference appropriate location
+    if [ ${referenceBase} == "s3://bicf-references" ]
+    then
+      echo "LOG: grabbing reference files from S3" >> ${repRID}.getRefInfer.err
+      aws s3 cp "\${references}" /hisat2 ./ --recursive 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      aws s3 cp "\${references}" /bed ./${refName}/ --recursive 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      aws s3 cp "\${references}" /*.fna --recursive 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      aws s3 cp "\${references}" /*.gtf --recursive 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+    elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
+    then
+      echo "LOG: using pre-defined locations for reference files" >> ${repRID}.getRefInfer.err
+      ln -s "\${references}"/hisat2 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      ln -s "\${references}"/bed ${refName}/bed 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      ln -s "\${references}"/genome.fna 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+      ln -s "\${references}"/genome.gtf 1>> ${repRID}.getRefInfer.out 2>> ${repRID}.getRefInfer.err
+    fi
+
+    #Make blank bed folder for ERCC
+    if [ "${refName}" == "ERCC" ]
+    then
+      rm ${refName}/bed
+      mkdir ${refName}/bed
+    fi
+    """
 }
 
+/*
+ * downsampleData: downsample fastq's for metadata inference
+ */
+process downsampleData {
+  tag "${repRID}"
+  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.downsampleData.{out,err}"
+
+  input:
+    val ends from endsManual_downsampleData
+    path fastq from fastqsTrim_downsampleData
+
+  output:
+    path ("sampled.1.fq") into fastqs1Sample
+    path ("sampled.2.fq") optional true into fastqs2Sample
+    path ("${repRID}.downsampleData.{out,err}")
+
+  script:
+    """
+    hostname > ${repRID}.downsampleData.err
+    ulimit -a >> ${repRID}.downsampleData.err
+    export https_proxy=\${http_proxy}
+
+    if [ "${ends}" == "se" ]
+    then
+      echo "LOG: downsampling single-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *trimmed.fq.gz 100000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
+    elif [ "${ends}" == "pe" ]
+    then
+      echo "LOG: downsampling read 1 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
+      echo "LOG: downsampling read 2 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq 2>> ${repRID}.downsampleData.err
+    fi
+    """
+}
+
+// Replicate the dowsampled fastq's and attatched to the references
+inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect())))
+
+/*
+ * alignSampleData: aligns the downsampled reads to a reference database
+*/
+process alignSampleData {
+  tag "${ref}"
+  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.alignSampleData.{out,err}"
+
+  input:
+    tuple val (ends), val (ref), path (hisat2), path (fna), path (gtf), path (fastq1), path (fastq2) from inferInput
+
+  output:
+    path ("${ref}.sampled.sorted.bam") into sampleBam
+    path ("${ref}.sampled.sorted.bam.bai") into sampleBai
+    path ("${ref}.alignSampleSummary.txt") into alignSampleQC
+    path ("${repRID}.${ref}.alignSampleData.{out,err}")
+
+  script:
+    """
+    hostname > ${repRID}.${ref}.alignSampleData.err
+    ulimit -a >> ${repRID}.${ref}.alignSampleData.err
+
+    #Align the reads with Hisat 2
+    if [ "${ends}" == "se" ]
+    then
+      echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.${ref}.alignSampleData.err
+      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${repRID}.alignSampleSummary.txt --new-summary 1>> ${repRID}.${ref}.alignSampleData.out 2>> ${repRID}.${ref}.alignSampleData.err
+    elif [ "${ends}" == "pe" ]
+    then
+      echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.${ref}.alignSampleData.err
+      hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary 1>> ${repRID}.${ref}.alignSampleData.out 2>> ${repRID}.${ref}.alignSampleData.err
+    fi
+    
+    #Convert the output sam file to a sorted bam file using Samtools
+    echo "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.err
+    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam 1>> ${repRID}.${ref}.alignSampleData.out 2>> ${repRID}.${ref}.alignSampleData.err;
+
+    #Sort the bam file using Samtools
+    echo "LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.err
+    samtools sort -@ `nproc` -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam 1>> ${repRID}.${ref}.alignSampleData.out 2>> ${repRID}.${ref}.alignSampleData.err;
+
+    #Index the sorted bam using Samtools
+    echo "LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.err
+    samtools index -@ `nproc` -b ${ref}.sampled.sorted.bam ${ref}.sampled.sorted.bam.bai 1>> ${repRID}.${ref}.alignSampleData.out 2>> ${repRID}.${ref}.alignSampleData.err;
+    """
+}
+
+process inferMetadata {
+  tag "${repRID}"
+  publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.inferMetadata.{out,err}"
+
+  input:
+    path script_inferMeta
+    path beds from bedInfer.collect()
+    path bam from sampleBam.collect()
+    path bai from sampleBai.collect()
+    path alignSummary from alignSampleQC.collect()
+
+  output:
+    path "infer.csv" into inferMetadata
+    path "${repRID}.inferMetadata.{out,err}" optional true
+
+  script:
+    """
+    hostname > ${repRID}.inferMetadata.err
+    ulimit -a >> ${repRID}.inferMetadata.err
+
+    # collect alignment rates
+    align_ercc=\$(echo \$(grep "Overall alignment rate" ERCC.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    align_hu=\$(echo \$(grep "Overall alignment rate" GRCh.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    align_mo=\$(echo \$(grep "Overall alignment rate" GRCm.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    
+    # determine spike-in
+    if [ 1 -eq \$(echo \$(expr \${align_ercc} ">=" 0.1)) ]
+    then
+      spike="yes"
+    else
+      spike="no"
+    fi
+    echo -e "LOG: Inference of strandedness results in: \${spike}" >> ${repRID}.inferMetadata.err
+
+    # determine species
+    if [ 1 -eq \$(echo \$(expr \${align_hu} ">=" 25)) ] && [ 1 -eq \$(echo \$(expr \${align_mo} "<" 25)) ]
+    then
+      species="Homo sapiens"
+      bam="GRCh.sampled.sorted.bam"
+      bed="./GRCh/bed/genome.bed"
+    elif [ 1 -eq \$(echo \$(expr \${align_mo} ">=" 25)) ] && [ 1 -eq \$(echo \$(expr \${align_hu} "<" 25)) ]
+    then
+      species="Mus musculus"
+      bam="GRCm.sampled.sorted.bam"
+      bed="./GRCm/bed/genome.bed"
+    else
+      echo "LOG: ERROR - Inference of species returns an ambiguous result" >> ${repRID}.inferMetadata.err
+      exit 1
+    fi
+    echo -e "LOG: Inference of species results in: \${species}" >> ${repRID}.inferMetadata.err
+
+    # infer experimental setting from dedup bam
+    echo "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.err
+    infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.inferMetadata.log 2>> ${repRID}.inferMetadata.err
+
+    echo "LOG: determining endedness and strandedness from file" >> ${repRID}.inferMetadata.err
+    ended=`bash inferMeta.sh endness ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+    fail=`bash inferMeta.sh fail ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+    if [ \${ended} == "PairEnd" ] 
+    then
+      ends="pe"
+      percentF=`bash inferMeta.sh pef ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+      percentR=`bash inferMeta.sh per ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+    elif [ \${ended} == "SingleEnd" ]
+    then
+      ends="se"
+      percentF=`bash inferMeta.sh sef ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+      percentR=`bash inferMeta.sh ser ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
+    fi
+    if [ 1 -eq \$(echo \$(expr \${percentF} ">" 0.25)) ] && [ 1 -eq \$(echo \$(expr \${percentR} "<" 0.25)) ]
+    then
+      stranded="forward"
+    elif [ 1 -eq \$(echo \$(expr \${percentR} ">" 0.25)) ] && [ 1 -eq \$(echo \$(expr \${percentF} "<" 0.25)) ]
+    then
+      stranded="reverse"
+
+    else
+      stranded="unstranded"
+    fi
+    echo -e "LOG: stradedness set to \${stranded}" >> ${repRID}.inferMetadata.err
+
+    # write infered metadata to file
+    echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" 1>> infer.csv 2>> ${repRID}.inferMetadata.err
+    """
+}
+
+// Split metadata into separate channels
+endsInfer = Channel.create()
+strandedInfer = Channel.create()
+spikeInfer = Channel.create()
+speciesInfer = Channel.create()
+align_erccInfer = Channel.create()
+align_huInfer = Channel.create()
+align_moInfer = Channel.create()
+percentFInfer = Channel.create()
+percentRInfer = Channel.create()
+failInfer = Channel.create()
+inferMetadata.splitCsv(sep: ",", header: false).separate(
+  endsInfer,
+  strandedInfer,
+  spikeInfer,
+  speciesInfer,
+  align_erccInfer,
+  align_huInfer,
+  align_moInfer,
+  percentFInfer,
+  percentRInfer,
+  failInfer
+)
+// Replicate metadata for multiple process inputs
+endsInfer.into {
+  endsInfer_alignData
+  endsInfer_countData
+}
+strandedInfer.into {
+  strandedInfer_alignData
+  strandedInfer_countData
+}
+spikeInfer.into{
+  spikeInfer_getRef
+}
+speciesInfer.into {
+  speciesInfer_getRef
+}
+
+
 /*
   * getRef: Dowloads appropriate reference
 */
 process getRef {
-  tag "${species_getRef}"
+  tag "${species}"
   publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.getRef.{out,err}"
 
   input:
-    val spike_getRef
-    val species_getRef
+    val spike from spikeInfer_getRef
+    val species from speciesInfer_getRef
 
   output:
     tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf")  into reference
@@ -226,27 +530,27 @@ process getRef {
     ulimit -a >> ${repRID}.getRef.err
     export https_proxy=\${http_proxy}
 
-    # run set the reference name
-    if [ "${species_getRef}" == "Mus musculus" ]
+    #Set the reference name
+    if [ "${species}" == "Mus musculus" ]
     then
       references=\$(echo ${referenceBase}/GRCm${refMoVersion})
-    elif [ '${species_getRef}' == "Homo sapiens" ]
+    elif [ '${species}' == "Homo sapiens" ]
     then
       references=\$(echo ${referenceBase}/GRCh${refHuVersion})
     else
-      echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species_getRef}" >> ${repRID}.getRef.err
+      echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species}" >> ${repRID}.getRef.err
       exit 1
     fi
-    if [ "${spike_getRef}" == "yes" ]
+    if [ "${spike}" == "yes" ]
     then
       references=\$(echo \${reference}-S/)
-    elif [ "${spike_getRef}" == "no" ]
+    elif [ "${spike}" == "no" ]
     then
       reference=\$(echo \${references}/)
     fi
     echo "LOG: species set to \${references}" >> ${repRID}.getRef.err
 
-    # retreive appropriate reference appropriate location
+    #Retreive appropriate reference appropriate location
     if [ ${referenceBase} == "s3://bicf-references" ]
     then
       echo "LOG: grabbing reference files from S3" >> ${repRID}.getRef.err
@@ -268,42 +572,8 @@ process getRef {
 // Replicate reference for multiple process inputs
 reference.into {
   reference_alignData
-  reference_makeFeatureCounts
-  reference_inferMeta
-}
-
-/*
- * trimData: trims any adapter or non-host sequences from the data
-*/
-process trimData {
-  tag "${repRID}"
-  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.trimData.{out,err}"
-
-  input:
-    val endsManual_trimData
-    path (fastq) from fastqs_trimData
-
-  output:
-    path ("*.fq.gz") into fastqs_trimmed
-    path ("*_trimming_report.txt") into trimQC
-    path ("${repRID}.trimData.{out,err}")
-
-  script:
-    """
-    hostname > ${repRID}.trimData.err
-    ulimit -a >> ${repRID}.trimData.err
-
-    #Trim fastqs using trim_galore
-    if [ "${endsManual_trimData}" == "se" ]
-    then
-      echo "LOG: running trim_galore using single-end settings" >> ${repRID}.trimData.err
-      trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err
-    elif [ "${endsManual_trimData}" == "pe" ]
-    then
-      echo "LOG: running trim_galore using paired-end settings" >> ${repRID}.trimData.err
-      trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err
-    fi
-    """
+  reference_countData
+  reference_dataQC
 }
 
 /*
@@ -314,9 +584,9 @@ process alignData {
   publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.align.{out,err}"
 
   input:
-    val endsManual_alignData
-    val stranded_alignData
-    path fastq from fastqs_trimmed
+    val ends from endsInfer_alignData
+    val stranded from strandedInfer_alignData
+    path fastq from fastqsTrim_alignData
     path reference_alignData
 
   output:
@@ -329,15 +599,33 @@ process alignData {
     hostname > ${repRID}.align.err
     ulimit -a >> ${repRID}.align.err
 
+    #Set stranded param for hisat2
+    if [ "${stranded}"=="unstranded" ]
+    then
+      strandedParam=""
+    elif [ "${stranded}" == "forward" ] && [ "${ends}" == "se" ]
+    then
+        strandedParam="--rna-strandness F"
+    elif [ "${stranded}" == "forward" ] && [ "${ends}" == "pe" ]
+    then
+      strandedParam="--rna-strandness FR"
+    elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "se" ]
+    then
+        strandedParam="--rna-strandness R"
+    elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "pe" ]
+    then
+      strandedParam="--rna-strandness RF"    
+    fi
+
     #Align the reads with Hisat 2
-    if [ "${endsManual_alignData}" == "se" ]
+    if [ "${ends}" == "se" ]
     then
       echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.align.err
-      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2>> ${repRID}.align.err
-    elif [ "${endsManual_alignData}" == "pe" ]
+      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2>> ${repRID}.align.err
+    elif [ "${ends}" == "pe" ]
     then
       echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.align.err
-      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2>> ${repRID}.align.err
+      hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2>> ${repRID}.align.err
     fi
     
     #Convert the output sam file to a sorted bam file using Samtools
@@ -368,7 +656,7 @@ process dedupData {
   publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.dedup.{out,err}"
 
   input:
-    set path (inBam), path (inBai) from rawBam_dedupData
+    set path (bam), path (bai) from rawBam_dedupData
 
   output:
     tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam
@@ -383,12 +671,14 @@ process dedupData {
 
     # remove duplicated reads using Picard's MarkDuplicates
     echo "LOG: running picard MarkDuplicates to remove duplicate reads" >> ${repRID}.dedup.err
-    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${inBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>> ${repRID}.dedup.out 2>> ${repRID}.dedup.err
+    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${bam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>> ${repRID}.dedup.out 2>> ${repRID}.dedup.err
 
-    # remove duplicated reads
-    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${inBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
+    # Sort the bam file using Samtools
     samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
+    
+    # Index the sorted bam using Samtools
     samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
+    
     # Split the deduped BAM file for multi-threaded tin calculation
     for i in `samtools view ${repRID}.sorted.deduped.bam | cut -f3 | sort | uniq`;
       do
@@ -399,9 +689,8 @@ process dedupData {
 
 // Replicate dedup bam/bai for multiple process inputs
 dedupBam.into {
-  dedupBam_makeFeatureCounts
+  dedupBam_countData
   dedupBam_makeBigWig
-  dedupBam_inferMeta
 }
 
 /*
@@ -413,7 +702,7 @@ process makeBigWig {
   publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}.bw"
 
   input:
-    set path (inBam), path (inBai) from dedupBam_makeBigWig
+    set path (bam), path (bai) from dedupBam_makeBigWig
 
   output:
     path ("${repRID}.bw")
@@ -426,57 +715,63 @@ process makeBigWig {
 
     #Run bamCoverage
     echo "LOG: Running bigWig bamCoverage" >> ${repRID}.makeBigWig.err
-    bamCoverage -p `nproc` -b ${inBam} -o ${repRID}.bw 1>> ${repRID}.makeBigWig.out 2>> ${repRID}.makeBigWig.err
+    bamCoverage -p `nproc` -b ${bam} -o ${repRID}.bw 1>> ${repRID}.makeBigWig.out 2>> ${repRID}.makeBigWig.err
     """
 }
 
 /*
- *Run featureCounts and get the counts, tpm
+ *Run countData and get the counts, tpm
 */
-process makeFeatureCounts {
+process countData {
   tag "${repRID}"
-  publishDir "${outDir}/featureCounts", mode: 'copy', pattern: "${repRID}*.countTable.csv"
-  publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.makeFetureCounts.{out,err}"
+  publishDir "${outDir}/countData", mode: 'copy', pattern: "${repRID}*.countTable.csv"
+  publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.countData.{out,err}"
 
   input:
     path script_calculateTPM
-    tuple path (bam), path (bai) from dedupBam_makeFeatureCounts
-    path reference_makeFeatureCounts
-    val endsManual_featureCounts
+    tuple path (bam), path (bai) from dedupBam_countData
+    path ref from reference_countData
+    val ends from endsInfer_countData
+    val stranded from strandedInfer_countData
 
   output:
     path ("*.countTable.csv") into counts
-    path ("*.featureCounts.summary") into countsQC
-    path ("${repRID}.makeFeatureCounts.{out,err}")
+    path ("*.countData.summary") into countsQC
+    path ("${repRID}.countData.{out,err}")
 
   script:
     """
-    hostname > ${repRID}.makeFeatureCounts.err
-    ulimit -a >> ${repRID}.makeFeatureCounts.err
+    hostname > ${repRID}.countData.err
+    ulimit -a >> ${repRID}.countData.err
 
-    #Determine strandedness and setup strandig for featureCounts
+    #Determine strandedness and setup strandig for countData
     stranding=0;
-    if [ "${stranded_featureCounts}" == "--rna-strandness F" ] || [ "${stranded_featureCounts}" == "--rna-strandness FR" ]
-      then
-      stranding=1
-      echo "LOG: strandedness set to stranded [1]" >> ${repRID}.makeFeatureCounts.err
-    else
+    if [ "${stranded}" == "unstranded" ]
+    then
       stranding=0
-      echo "LOG: strandedness set to unstranded [0]" >> ${repRID}.makeFeatureCounts.err
-    fi;
-    #Run featureCounts
-    echo "LOG: running featureCounts on the data" >> ${repRID}.makeFeatureCounts.err
-    if [ "${endsManual_featureCounts }" == "se" ]
+      echo "LOG: strandedness set to unstranded [0]" >> ${repRID}.countData.err
+    elif [ "${stranded}" == "forward" ]
     then
-      featureCounts -R SAM -p -G ./genome.fna -T `nproc` -s \${stranding} -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' --primary --ignoreDup ${repRID}.sorted.deduped.bam 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err
-    elif [ "${endsManual_featureCounts }" == "pe" ]
+      stranding=1
+      echo "LOG: strandedness set to forward stranded [1]" >> ${repRID}.countData.err
+    elif [ "${stranded}" == "reverse" ]
     then
-      featureCounts -R SAM -p -G ./genmome.fna -T `nproc` -s \${stranding} -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' --primary --ignoreDup -B ${repRID}.sorted.deduped.bam 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err
+      stranding=2
+      echo "LOG: strandedness set to forward stranded [2]" >> ${repRID}.countData.err
+    fi
+    #Run countData
+    echo "LOG: running countData on the data" >> ${repRID}.countData.err
+    if [ "${ends}" == "se" ]
+    then
+      featureCounts -R SAM -p -G ./genome.fna -T `nproc` -s \${stranding} -a ./genome.gtf -o ${repRID}.countData -g 'gene_name' --primary --ignoreDup ${repRID}.sorted.deduped.bam 1>> ${repRID}.countData.out 2>> ${repRID}.countData.err
+    elif [ "${ends}" == "pe" ]
+    then
+      featureCounts -R SAM -p -G ./genmome.fna -T `nproc` -s \${stranding} -a ./genome.gtf -o ${repRID}.countData -g 'gene_name' --primary --ignoreDup -B ${repRID}.sorted.deduped.bam 1>> ${repRID}.countData.out 2>> ${repRID}.countData.err
     fi
 
-    #Calculate TMP from the resulting featureCounts table
-    echo "LOG: calculating TMP with R" >> ${repRID}.makeFeatureCounts.err
-    Rscript calculateTPM.R --count "${repRID}.featureCounts" 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err
+    #Calculate TPM from the resulting countData table
+    echo "LOG: calculating TPM with R" >> ${repRID}.countData.err
+    Rscript calculateTPM.R --count "${repRID}.countData" 1>> ${repRID}.countData.out 2>> ${repRID}.countData.err
     """
 }
 
@@ -507,77 +802,30 @@ process fastqc {
 }
 
 /*
- *inferMetadata: run RSeQC to collect stats and infer experimental metadata
+ *dataQC: run RSeQC to calculate transcript integrity numbers (TIN)
 */
 
-process inferMetadata {
+process dataQC {
   tag "${repRID}"
-  publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.rseqc.{out,err}"
+  publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.dataQC.{out,err}"
 
   input:
-    path script_inferMeta
-    path reference_inferMeta
-    set path (inBam), path (inBai) from dedupBam_inferMeta
-    set path (inChrBam), path (inChrBai) from dedupChrBam
+    path ref from reference_dataQC
+    set path (chrBam), path (chrBai) from dedupChrBam
 
   output:
-    path "infer.csv" into inferedMetadata
-    path "${inBam.baseName}.tin.xls" into tin
-    path "${repRID}.insertSize.inner_distance_freq.txt" optional true into innerDistance
-    path "${repRID}.rseqc.{out,err}" optional true
+    path "${repRID}.sorted.deduped.tin.xls" into tin
+    path "${repRID}.dataQC.{out,err}" optional true
 
   script:
     """
-    hostname > ${repRID}.rseqc.err
-    ulimit -a >> ${repRID}.rseqc.err
-
-    # infer experimental setting from dedup bam
-    echo "LOG: running inference from bed file" >> ${repRID}.rseqc.err
-    infer_experiment.py -r ./bed/genome.bed -i "${inBam}" > ${repRID}.rseqc.log 2>> ${repRID}.rseqc.err
-
-    echo "LOG: determining endedness and strandedness from file" >> ${repRID}.rseqc.err
-    endness=`bash inferMeta.sh endness ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-    fail=`bash inferMeta.sh fail ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-    if [ \${endness} == "PairEnd" ] 
-    then
-      percentF=`bash inferMeta.sh pef ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-      percentR=`bash inferMeta.sh per ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-      inner_distance.py -i "${inBam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-    elif [ \${endness} == "SingleEnd" ]
-    then
-      percentF=`bash inferMeta.sh sef ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-      percentR=`bash inferMeta.sh ser ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err
-    fi
-    if [ 1 -eq \$(echo \$(expr \$percentF ">" 0.25)) ] && [ 1 -eq \$(echo \$(expr \$percentR "<" 0.25)) ]
-    then
-      stranded="forward"
-      if [ \$endness == "PairEnd" ]
-      then
-        strategy="1++,1--,2+-,2-+"
-      else
-        strategy="++,--"
-      fi
-    elif [ 1 -eq \$(echo \$(expr \$percentR ">" 0.25)) ] && [ 1 -eq \$(echo \$(expr \$percentF "<" 0.25)) ]
-    then
-      stranded="reverse"
-      if [ \$endness == "PairEnd" ]
-      then
-        strategy="1+-,1-+,2++,2--"
-      else
-        strategy="+-,-+"
-      fi
-    else
-      stranded="unstranded"
-      strategy="us"
-    fi
-    echo -e "LOG: strategy set to \${strategy}\nStranded set to ${stranded}" >> ${repRID}.rseqc.err
+    hostname > ${repRID}.dataQC.err
+    ulimit -a >> ${repRID}.dataQC.err
 
     # calcualte TIN values per feature on each chromosome
+    echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}.sorted.deduped.tin.xls
     for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do
       echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.rseqc.err; tin.py -i ${repRID}.sorted.deduped.\${i}.bam  -r ./bed/genome.bed 1>>${repRID}.rseqc.log 2>>${repRID}.rseqc.err; cat ${repRID}.sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
-    done | parallel -j `nproc` -k > ${repRID}.sorted.deduped.tin.xls 2>>${repRID}.rseqc.err
-
-    # write infered metadata to file
-    echo \${endness},\${stranded},\${strategy},\${percentF},\${percentR},\${fail} > infer.csv 2>> ${repRID}.rseqc.err
+    done | parallel -j `nproc` -k 1>> ${repRID}.sorted.deduped.tin.xls 2>>${repRID}.rseqc.err
     """
-}
+}
\ No newline at end of file
diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py
index eaa2656f9266a5c3de07fb0fc80ad5aeb2ea8422..78b0e2f4e9fb51f9fc827321f303ccc0d3bdcad1 100644
--- a/workflow/scripts/parseMeta.py
+++ b/workflow/scripts/parseMeta.py
@@ -10,7 +10,6 @@ def get_args():
     parser.add_argument('-r', '--repRID',help="The replicate RID.",required=True)
     parser.add_argument('-m', '--metaFile',help="The metadata file to extract.",required=True)
     parser.add_argument('-p', '--parameter',help="The parameter to extract.",required=True)
-    parser.add_argument('-e', '--endsManual',help="The endness.",required=False)
     args = parser.parse_args()
     return args
 
@@ -56,12 +55,9 @@ def main():
     # Get strandedness metadata from 'Experiment Settings.csv'
     if (args.parameter == "stranded"):
         if (metaFile.Has_Strand_Specific_Information.unique() == "yes"):
-            if (args.endsManual=="se"):
-                stranded = "--rna-strandness F"
-            elif (args.endsManual=="pe"):
-                stranded = "--rna-strandness FR"
+            stranded = "stranded"
         elif (metaFile.Has_Strand_Specific_Information.unique() == "no"):
-            stranded = ""
+            stranded = "unstranded"
         else:
             print("Stranded metadata not match expected options: " + metaFile.Has_Strand_Specific_Information.unique())
             exit(1)
diff --git a/workflow/tests/test_dataQC.py b/workflow/tests/test_dataQC.py
new file mode 100644
index 0000000000000000000000000000000000000000..95eb332736bf7af93945402e6f79f04daac83f9f
--- /dev/null
+++ b/workflow/tests/test_dataQC.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../../'
+
+@pytest.mark.dataQC
+def test_dataQC():
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.sorted.deduped.tin.xls'))
+
diff --git a/workflow/tests/test_downsampleData.py b/workflow/tests/test_downsampleData.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd42c49e169e387dd9662903b20866c40aec8907
--- /dev/null
+++ b/workflow/tests/test_downsampleData.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../../'
+
+@pytest.mark.downsampleData
+def test_downsampleData():
+    assert os.path.exists(os.path.join(test_output_path, 'sampled.1.fq'))
\ No newline at end of file
diff --git a/workflow/tests/test_inferMetadata.py b/workflow/tests/test_inferMetadata.py
index 44ffbc3f239630b357b5fde7746787fe9ca65d62..f2908bee242281c23e814f8bf8e3e3d68e395d9f 100644
--- a/workflow/tests/test_inferMetadata.py
+++ b/workflow/tests/test_inferMetadata.py
@@ -10,5 +10,5 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 
 @pytest.mark.inferMetadata
 def test_inferMetadata():
-    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.sorted.deduped.tin.xls'))
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.inferMetadata.log'))