diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index acb133b5be9f6f5e79c352f6704633fbe716d907..bc3e9e879c8d95e072d2918d9b04e69c69601852 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,5 +6,5 @@ before_script:
 
 test:
   script:
-  - nextflow run workflow/main.nf
+  - nextflow run workflow/main.nf 
   - pytest
diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml
index f8b8dcf0bd0e0684ac9042aeeff55e5f2c7e628a..023968245603d637a7d8a9d4de07108ef961dcf0 100644
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -44,6 +44,9 @@ workflow_modules:
   - 'cutadapt/1.9.1'
   - 'fastqc/0.11.2'
   - 'bwa/intel/0.7.12'
+  - 'samtools/1.4.1'
+  - 'sambamba/0.6.6'
+  - 'bedtools/2.26.0'
 
 # A list of parameters used by the workflow, defining how to present them,
 # options etc in the web interface. For each parameter:
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index c92f9f5ecb8b174f5a6db24c177638466451c90d..3268a6efb9431382e9f12887539ee242615e4ee1 100644
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -15,6 +15,10 @@ process {
     module = ['python/3.6.1-2-anaconda', 'bwa/intel/0.7.12', 'samtools/intel/1.3']
     cpus = 32
   }
+  $filterReads{
+    module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'sambamba/0.6.6', 'bedtools/2.26.0']
+    cpus = 32
+  }
 }
 
 params {
diff --git a/workflow/main.nf b/workflow/main.nf
index 90763c5aa18db0c0417bcc83c6002c11b5b15946..fd0c59d503f1c60e7f751db3058baa4e95f3b6be 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -114,7 +114,7 @@ process alignReads {
   output:
 
   set sampleId, file('*.bam'), biosample, factor, treatment, replicate, controlId into mappedReads
-  set file('*.srt.bam.flagstat.qc')
+  file '*.srt.bam.flagstat.qc' into mappedReadsStats
 
   script:
 
@@ -130,3 +130,35 @@ process alignReads {
   }
 
 }
+
+// Dedup reads using sambamba
+process filterReads {
+
+  tag "$sampleId-$replicate"
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+
+  input:
+
+  set sampleId, mapped, biosample, factor, treatment, replicate, controlId from mappedReads
+
+  output:
+
+  set sampleId, file('*.bam'), file('*.bai'), biosample, factor, treatment, replicate, controlId into dedupReads
+  file '*flagstat.qc' into dedupReadsStats
+  file '*pbc.qc' into dedupReadsComplexity
+  file '*dup.qc' into dupReads
+
+  script:
+
+  if (pairedEnd) {
+    """
+    python3 $baseDir/scripts/map_qc.py -b $mapped -p
+    """
+  }
+  else {
+    """
+    python3 $baseDir/scripts/map_qc.py -b $mapped
+    """
+  }
+
+}
diff --git a/workflow/scripts/check_design.py b/workflow/scripts/check_design.py
index acf4f014f2c4ed24d25da9bd5e268ace9bd2fadd..52185d6f6adde130a34cc593b8fe4f113e9b3879 100644
--- a/workflow/scripts/check_design.py
+++ b/workflow/scripts/check_design.py
@@ -26,11 +26,11 @@ def get_args():
         formatter_class=argparse.RawDescriptionHelpFormatter)
 
     parser.add_argument('-d', '--design',
-                        help="The design file to run QC (TSV format).",
+                        help="The design file to run QC (tsv format).",
                         required=True)
 
     parser.add_argument('-f', '--fastq',
-                        help="File with list of fastq files (csv format).",
+                        help="File with list of fastq files (tsv format).",
                         required=True)
 
     parser.add_argument('-p', '--paired',
diff --git a/workflow/scripts/map_qc.py b/workflow/scripts/map_qc.py
new file mode 100644
index 0000000000000000000000000000000000000000..f44b6e4cd39895fa09984a13ce066cbf8d5299b8
--- /dev/null
+++ b/workflow/scripts/map_qc.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+
+'''Remove duplicates and filter unmapped reads.'''
+
+import os
+import subprocess
+import argparse
+import shutil
+import shlex
+import logging
+from multiprocessing import cpu_count
+import utils
+import pandas as pd
+
+EPILOG = '''
+For more details:
+        %(prog)s --help
+'''
+
+# SETTINGS
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.INFO)
+
+
+# the order of this list is important.
+# strip_extensions strips from the right inward, so
+# the expected right-most extensions should appear first (like .gz)
+# Modified from J. Seth Strattan
+STRIP_EXTENSIONS = ['.bam', '.srt']
+
+
+def get_args():
+    '''Define arguments.'''
+    parser = argparse.ArgumentParser(
+        description=__doc__, epilog=EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('-b', '--bam',
+                        help="The bam file to run filtering and qc on.",
+                        required=True)
+
+    parser.add_argument('-p', '--paired',
+                        help="True/False if paired-end or single end.",
+                        default=False,
+                        action='store_true')
+
+    args = parser.parse_args()
+    return args
+
+# Functions
+
+
+def check_tools():
+    '''Checks for required componenets on user system'''
+
+    logger.info('Checking for required libraries and components on this system')
+
+    samtools_path = shutil.which("samtools")
+    if samtools_path:
+        logger.info('Found samtools: %s', samtools_path)
+    else:
+        logger.error('Missing samtools')
+        raise Exception('Missing samtools')
+
+    sambamba_path = shutil.which("sambamba")
+    if sambamba_path:
+        logger.info('Found sambamba: %s', sambamba_path)
+    else:
+        logger.error('Missing sambamba')
+        raise Exception('Missing sambamba')
+
+    bedtools_path = shutil.which("bedtools")
+    if bedtools_path:
+        logger.info('Found bedtools: %s', bedtools_path)
+    else:
+        logger.error('Missing bedtools')
+        raise Exception('Missing bedtools')
+
+
+def filter_mapped_pe(bam, bam_basename):
+    '''Use samtools to filter unmapped reads for PE data.'''
+
+    filt_bam_prefix = bam_basename + ".filt.srt"
+    filt_bam_filename = filt_bam_prefix + ".bam"
+    tmp_filt_bam_prefix = "tmp.%s" % (filt_bam_prefix)
+    tmp_filt_bam_filename = tmp_filt_bam_prefix + ".bam"
+
+    # Remove  unmapped, mate unmapped
+    # not primary alignment, reads failing platform
+    # Remove low MAPQ reads
+    # Only keep properly paired reads
+    # Obtain name sorted BAM file
+    out, err = utils.run_pipe([
+        # filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire;
+        # -q 30 exclude MAPQ < 30; -u uncompressed output
+        # exclude FLAG 1804: unmapped, next segment unmapped, secondary
+        # alignments, not passing platform q, PCR or optical duplicates
+        # require FLAG 2: properly aligned
+        "samtools view -F 1804 -f 2 -q 30 -u %s" % (bam),
+        # sort:  -n sort by name; - take input from stdin;
+        # out to specified filename
+        # Will produce name sorted BAM
+        "samtools sort -n -@ %d -o %s" % (cpu_count(), tmp_filt_bam_filename)])
+    if err:
+        logger.error("samtools filter error: %s" % (err))
+
+    # Remove orphan reads (pair was removed)
+    # and read pairs mapping to different chromosomes
+    # Obtain position sorted BAM
+    out, err = utils.run_pipe([
+        # fill in mate coordinates, ISIZE and mate-related flags
+        # fixmate requires name-sorted alignment; -r removes secondary and
+        # unmapped (redundant here because already done above?)
+        # - send output to stdout
+        "samtools fixmate -r %s -" % (tmp_filt_bam_filename),
+        # repeat filtering after mate repair
+        "samtools view -F 1804 -f 2 -u -",
+        # produce the coordinate-sorted BAM
+        "samtools sort -@ %d -o %s" % (cpu_count(), filt_bam_filename)])
+
+    os.remove(tmp_filt_bam_filename)
+    return filt_bam_filename
+
+
+def filter_mapped_se(bam, bam_basename):
+    '''Use samtools to filter unmapped reads for SE data.'''
+
+    filt_bam_prefix = bam_basename + ".filt.srt"
+    filt_bam_filename = filt_bam_prefix + ".bam"
+
+    # Remove unmapped, mate unmapped
+    # not primary alignment, reads failing platform
+    # Remove low MAPQ reads
+    # Obtain name sorted BAM file
+    with open(filt_bam_filename, 'w') as fh:
+        samtools_filter_command = (
+            "samtools view -F 1804 -q 30 -b %s"
+            % (bam)
+            )
+        logger.info(samtools_filter_command)
+        subprocess.check_call(
+            shlex.split(samtools_filter_command),
+            stdout=fh)
+
+    return filt_bam_filename
+
+
+def dedup_mapped(bam, bam_basename, paired):
+    '''Use sambamba and samtools to remove duplicates.'''
+
+    # Markduplicates
+    dup_file_qc_filename = bam_basename + ".dup.qc"
+    tmp_dup_mark_filename = bam_basename + ".dupmark.bam"
+    sambamba_parms = "--hash-table-size=17592186044416" + \
+                    " --overflow-list-size=20000000 --io-buffer-size=256"
+    with open(dup_file_qc_filename, 'w') as fh:
+        sambamba_markdup_command = (
+            "sambamba markdup -t %d %s %s %s"
+            % (cpu_count(), sambamba_parms, bam, tmp_dup_mark_filename)
+            )
+        logger.info(sambamba_markdup_command)
+        subprocess.check_call(
+            shlex.split(sambamba_markdup_command),
+            stderr=fh)
+
+
+    # Remove duplicates
+    final_bam_prefix = bam_basename + ".filt.nodup"
+    final_bam_filename = final_bam_prefix + ".bam"
+
+    if paired:  # paired-end data
+        samtools_dedupe_command = \
+            "samtools view -F 1804 -f 2 -b %s" % (tmp_dup_mark_filename)
+    else:
+        samtools_dedupe_command = \
+            "samtools view -F 1804 -b %s" % (tmp_dup_mark_filename)
+
+    with open(final_bam_filename, 'w') as fh:
+        logger.info(samtools_dedupe_command)
+        subprocess.check_call(
+            shlex.split(samtools_dedupe_command),
+            stdout=fh)
+
+    # Index final bam file
+    sambamba_index_command = \
+        "samtools index -@ %d %s" % (cpu_count(), final_bam_filename)
+    logger.info(sambamba_index_command)
+    subprocess.check_output(shlex.split(sambamba_index_command))
+
+    # Generate mapping statistics
+    final_bam_file_mapstats_filename = final_bam_prefix + ".flagstat.qc"
+    with open(final_bam_file_mapstats_filename, 'w') as fh:
+        flagstat_command = "sambamba flagstat -t %d %s" \
+                            % (cpu_count(), final_bam_filename)
+        logger.info(flagstat_command)
+        subprocess.check_call(shlex.split(flagstat_command), stdout=fh)
+
+    os.remove(bam)
+    return tmp_dup_mark_filename
+
+
+def compute_complexity(bam, paired, bam_basename):
+    '''Calculate library complexity .'''
+
+    pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc"
+    tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)
+
+    # Sort by name
+    # convert to bedPE and obtain fragment coordinates
+    # sort by position and strand
+    # Obtain unique count statistics
+
+    # PBC File output
+    # TotalReadPairs [tab]
+    # DistinctReadPairs [tab]
+    # OneReadPair [tab]
+    # TwoReadPairs [tab]
+    # NRF=Distinct/Total [tab]
+    # PBC1=OnePair/Distinct [tab]
+    # PBC2=OnePair/TwoPair
+    pbc_headers = ['TotalReadPairs',
+                    'DistinctReadPairs',
+                    'OneReadPair',
+                    'TwoReadPairs',
+                    'NRF',
+                    'PBC1',
+                    'PBC2']
+
+    if paired:
+        steps = [
+            "sambamba sort -t %d -n %s" % (cpu_count(), bam),
+            "bamToBed -bedpe -i stdin",
+            r"""awk 'BEGIN{OFS="\t"}{print $1,$2,$4,$6,$9,$10}'"""]
+    else:
+        steps = [
+            "bamToBed -i %s" % (bam),
+            r"""awk 'BEGIN{OFS="\t"}{print $1,$2,$3,$6}'"""]
+    steps.extend([
+        "grep -v 'chrM'",
+        "sort",
+        "uniq -c",
+        r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'"""
+        ])
+    out, err = utils.run_pipe(steps, tmp_pbc_file_qc_filename)
+    if err:
+        logger.error("PBC file error: %s" % (err))
+
+    # Add headers
+    pbc_file = pd.read_csv(tmp_pbc_file_qc_filename, sep='\t', header=None,
+                          names=pbc_headers)
+    pbc_file.to_csv(pbc_file_qc_filename, header=True, sep='\t', index=False)
+    os.remove(bam)
+    os.remove(bam + '.bai')
+    os.remove(tmp_pbc_file_qc_filename)
+
+
+def main():
+    args = get_args()
+    paired = args.paired
+    bam = args.bam
+
+    # Create a file handler
+    handler = logging.FileHandler('map_qc.log')
+    logger.addHandler(handler)
+
+    # Check if tools are present
+    check_tools()
+
+    # Run filtering for either PE or SE
+    bam_basename = os.path.basename(
+        utils.strip_extensions(bam, STRIP_EXTENSIONS))
+    if paired:  # paired-end data
+        filter_bam_filename = filter_mapped_pe(bam, bam_basename)
+
+    else:
+        filter_bam_filename = filter_mapped_se(bam, bam_basename)
+
+    # Remove duplicates
+    markdup_bam_filename = dedup_mapped(filter_bam_filename, bam_basename, paired)
+
+    # Compute library complexity
+    compute_complexity(markdup_bam_filename, paired, bam_basename)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/workflow/tests/test_map_qc.py b/workflow/tests/test_map_qc.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebc2cd5aae2808986fb59a48250cf53eadb162c4
--- /dev/null
+++ b/workflow/tests/test_map_qc.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+
+import pytest
+import os
+import pandas as pd
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../output/filterReads/'
+
+
+def test_map_qc_singleend():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai'))
+    filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc'
+    samtools_report = open(filtered_reads_report).readlines()
+    assert '64962570 + 0 in total' in samtools_report[0]
+    assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
+    library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc'
+    df_library_complexity = pd.read_csv(library_complexity, sep='\t')
+    assert  df_library_complexity["NRF"].iloc[0] == 0.926192
+    assert  df_library_complexity["PBC1"].iloc[0] == 0.926775
+    assert  df_library_complexity["PBC2"].iloc[0] == 13.706885
+
+
+def test_map_qc_pairedend():
+    # Do the same thing for paired end data
+    pass
diff --git a/workflow/tests/test_map_reads.py b/workflow/tests/test_map_reads.py
index d5a83667a39e768c0c66698b298a53f1aa681ce2..04ffd08084999dfeb2eebde88055e73e9832951f 100644
--- a/workflow/tests/test_map_reads.py
+++ b/workflow/tests/test_map_reads.py
@@ -3,10 +3,13 @@
 import pytest
 import os
 
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../output/alignReads/'
+
 
 def test_map_reads_singleend():
-    aligned_reads_report = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/alignReads/ENCFF646LXU.srt.bam.flagstat.qc'
+    assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.srt.bam'))
+    aligned_reads_report = test_output_path + 'ENCFF646LXU.srt.bam.flagstat.qc'
     samtools_report = open(aligned_reads_report).readlines()
     assert '80795025 + 0 in total' in samtools_report[0]
     assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4]
diff --git a/workflow/tests/test_trim_reads.py b/workflow/tests/test_trim_reads.py
index e9680f681251d9d9a9db07ce679e5e5d6a0eba2f..7ca03750cd6f585e078de78d550e3f7fbac824bd 100644
--- a/workflow/tests/test_trim_reads.py
+++ b/workflow/tests/test_trim_reads.py
@@ -3,14 +3,17 @@
 import pytest
 import os
 
+test_data_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../../test_data/'
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../output/trimReads/'
+
 
 def test_trim_reads_singleend():
-    raw_fastq = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../../test_data/ENCFF833BLU.fastq.gz'
-    trimmed_fastq = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/trimReads/ENCFF833BLU_trimmed.fq.gz'
-    trimmed_fastq_report = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/trimReads/ENCFF833BLU.fastq.gz_trimming_report.txt'
+    raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz'
+    trimmed_fastq = test_output_path + 'ENCFF833BLU_trimmed.fq.gz'
+    trimmed_fastq_report = test_output_path + \
+                            'ENCFF833BLU.fastq.gz_trimming_report.txt'
     assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
     assert os.path.getsize(trimmed_fastq) == 2512853101
     assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4]