diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5c9d184ccff41358ca43353aa909d8fda6e788fd..37083c154c27c879833b85c8f8c256c2aecb46dc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -61,6 +61,12 @@ alignData:
   - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bai
   - pytest -m alignData
 
+dedupData:
+  stage: unit
+  script:
+  - singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
+  - pytest -m dedupData
+
 integration_se:
   stage: integration
   script:
diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config
index 52043ab4da33c04a110605866acb3bc5eba750c2..ca23b6c49cd80b98c3120010609d46e7131757f8 100755
--- a/workflow/conf/aws_ondemand.config
+++ b/workflow/conf/aws_ondemand.config
@@ -13,13 +13,22 @@ process {
   cpus = 1
   memory = '1 GB'
 
+  withName:parseMetadata {
+    cpus = 5
+  }
   withName:getRef {
     cpus = 8
   }
   withName:trimData {
-    cpus = 15
+    cpus = 8
+    memory = '2 GB'
   }
   withName:alignData {
     cpus = 50
+    memory = '10 GB'
+  }
+  withName:dedupData {
+    cpus = 2
+    memory = '20 GB'
   }
 }
\ No newline at end of file
diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config
index 189ab7e02f21440583feb13606964ab67e7a4f78..e0447565495e09c3ccde2968eb887aa14f7db251 100755
--- a/workflow/conf/aws_spot.config
+++ b/workflow/conf/aws_spot.config
@@ -13,13 +13,22 @@ process {
   cpus = 1
   memory = '1 GB'
 
+  withName:parseMetadata {
+    cpus = 5
+  }
   withName:getRef {
     cpus = 8
   }
   withName:trimData {
-    cpus = 15
+    cpus = 8
+    memory = '2 GB'
   }
   withName:alignData {
     cpus = 50
+    memory = '10 GB'
+  }
+  withName:dedupData {
+    cpus = 2
+    memory = '20 GB'
   }
 }
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 7d58dcc685fab609ac4c8ec71ec449871bc6abe5..a17f91e1a3397c2c28fe84d0e70b2342f32cbb0a 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -16,10 +16,13 @@ process {
     executor = 'local'
   }
   withName:trimData {
-    queue = '256GB,256GBv1,384GB'
+    queue = 'super'
   }
   withName:alignData {
-    queue = '256GB,256GBv1,384GB'
+    queue = '256GB,256GBv1'
+  }
+  withName: dedupData {
+    queue = 'super'
   }
 }
 
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index b78a6e4f3f2675f1ba585d91bed02d9b9fb63e37..0b6f27a5c078db72a5ae025a5d0cde2118163047 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -29,6 +29,9 @@ process {
   withName: alignData {
     container = 'bicf/gudmaprbkaligner:2.0.0'
   }
+  withName: dedupData {
+    container = 'bicf/picard2.21.7:2.0.0'
+  }
 }
 
 trace {
@@ -58,4 +61,4 @@ manifest {
   mainScript = 'rna-seq.nf'
   version = 'v0.0.1_indev'
   nextflowVersion = '>=19.09.0'
-}
+}
\ No newline at end of file
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 21c77a09555cabf75011ceab149d3bed31f88e34..740db3bc6973c950339127ba8d81934766ad085f 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -209,6 +209,7 @@ process getRef {
     ulimit -a >>${repRID}.getRef.err
     export https_proxy=\${http_proxy}
 
+    # retreive appropriate reference from S3 bucket
     if [ "${species_getRef}" == "Mus musculus" ]
     then
       references=\$(echo ${referenceBase}/mouse/${refVersion}/GRCm${refMuVersion})
@@ -275,14 +276,17 @@ process alignData {
     path reference
 
   output:
-    path ("${repRID}.unal.gz")
-    path ("${repRID}.sorted.bam")
-    path ("${repRID}.sorted.bai")
+    path ("${repRID}.sorted.bam") into rawBam
+    path ("${repRID}.sorted.bai") into rawBai
     path ("${repRID}.align.out")
     path ("${repRID}.align.err")
 
   script:
     """
+    hostname >${repRID}.align.err
+    ulimit -a >>${repRID}.align.err
+
+    # align reads
     if [ "${endsManual_alignData}" == "se" ]
     then
       hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} 1>${repRID}.align.out 2>${repRID}.align.err
@@ -290,8 +294,36 @@ process alignData {
     then
       hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} 1>${repRID}.align.out 2>${repRID}.align.err
     fi
-    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err
-    samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err
-    samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bai 1>>${repRID}.align.out 2>>${repRID}.align.err
+    
+    # convert sam to bam and sort and index
+    samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err;
+    samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err;
+    samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bai 1>>${repRID}.align.out 2>>${repRID}.align.err;
+    """
+}
+
+/*
+ *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates
+*/
+process dedupData {
+  tag "${repRID}"
+  publishDir "${outDir}/bam", mode: 'copy', pattern: "*.deduped.bam"
+  publishDir "${logsDir}", mode: 'copy', pattern: "*.dedup.{out,err}"
+
+  input:
+    path rawBam
+
+  output:
+    path ("${repRID}.deduped.bam") into dedupBam
+    path ("${repRID}.dedup.out")
+    path ("${repRID}.dedup.err")
+
+  script:
+    """
+    hostname >${repRID}.dedup.err
+    ulimit -a >>${repRID}.dedup.err
+
+    #Remove duplicated reads
+    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
     """
 }
\ No newline at end of file
diff --git a/workflow/tests/test_alignReads.py b/workflow/tests/test_alignReads.py
index 7c20dd3f269e202e824285daa00a85d4301b3278..d95e63a6426e5d1cbec874b0c4e86b17388ba975 100644
--- a/workflow/tests/test_alignReads.py
+++ b/workflow/tests/test_alignReads.py
@@ -20,20 +20,4 @@ def test_alignData_se():
 def test_alignData_pe():
 	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.unal.gz'))
 	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bam'))
-	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bai'))
-
-
-@pytest.mark.alignLogs
-def test_alignLogs_se():
-	assert os.path.exists(os.path.join(data_output_path, '16-1ZX4.align.err'))
-	assert '34497376 reads; of these:' in open(os.path.join(data_output_path, '16-1ZX4.align.err')).readlines()[0]
-	assert os.path.exists(os.path.join(data_output_path, '16-1ZX4.align.out'))
-
-
-@pytest.mark.alignLogs
-def test_alignLogs_pe():
-	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA.align.err'))
-	assert utils.count_lines(os.path.join(data_output_path, 'Q-Y5JA.align.err')) == 7
-	assert '15824858 reads; of these:' in open(os.path.join(data_output_path, 'Q-Y5JA.align.err')).readlines()[0]
-	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA.align.out'))
-	assert utils.count_lines(os.path.join(data_output_path, 'Q-Y5JA.align.out')) == 0
+	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bai'))
\ No newline at end of file
diff --git a/workflow/tests/test_dedupReads.py b/workflow/tests/test_dedupReads.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c0cd7aa67ab0e47168f8bf438191f6a9c217b72
--- /dev/null
+++ b/workflow/tests/test_dedupReads.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+import os
+import utils
+
+data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+	'/../../'
+
+
+@pytest.mark.dedupData
+def test_dedupData():
+	assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.deduped.bam'))
\ No newline at end of file