e4c84105 · e4c84105 · e4c84105 · e4c84105 · f42d1727 · e4c84105
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -4,11 +4,28 @@ profiles {
  }
 }

+trace {
+  enabled = true
+  file = 'pipeline_trace.txt'
+  fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
+}
+
+timeline {
+  enabled = true
+  file = 'timeline.html'
+}
+
+report {
+  enabled = true
+  file = 'report.html'
+}
+
+
 manifest {
  name = 'chipseq_analysis'
  description = 'BICF ChIP-seq Analysis Workflow.'
  homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
-  version = '1.0.6'
+  version = '1.1.2'
  mainScript = 'main.nf'
  nextflowVersion = '>=0.31.0'
 }
--- a/workflow/scripts/annotate_peaks.R
+++ b/workflow/scripts/annotate_peaks.R
@@ -6,40 +6,27 @@
 #* --------------------------------------------------------------------------
 #*

+#Currently Human or Mouse
+
 # Load libraries
 library("ChIPseeker")
-
-# Currently mouse or human
-
-library("TxDb.Hsapiens.UCSC.hg19.knownGene")
-library("TxDb.Mmusculus.UCSC.mm10.knownGene")
-library("TxDb.Hsapiens.UCSC.hg38.knownGene")
-library("org.Hs.eg.db")
-library("org.Mm.eg.db")
-
+library(GenomicFeatures)

 # Create parser object
 args <- commandArgs(trailingOnly=TRUE)

 # Check input args
-if (length(args) != 2) {
-  stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE)
+if (length(args) != 3) {
+  stop("Usage: annotate_peaks.R annotate_design.tsv gtf geneNames", call.=FALSE)
 }

 design_file <- args[1]
-genome_assembly <- args[2]
+gtf <- args[2]
+geneNames <- args[3]

 # Load UCSC Known Genes
-if(genome_assembly=='GRCh37') {
-    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-    annodb <- 'org.Hs.eg.db'
-} else if(genome_assembly=='GRCm38')  {
-    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-    annodb <- 'org.Mm.eg.db'
-} else if(genome_assembly=='GRCh38')  {
-    txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
-    annodb <- 'org.Hs.eg.db'
-}
+txdb <- makeTxDbFromGFF(gtf)
+sym <- read.table(geneNames, header=T, sep='\t') [,4:5]

 # Output version of ChIPseeker
 chipseeker_version = packageVersion('ChIPseeker')
@@ -54,18 +41,19 @@ names(files) <- design$Condition
 # Granges of files

 peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
-peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE)
+peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)

-column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
+column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
                  "pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
-                  "geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS",
-                  "ENSEMBL", "symbol", "geneName")
+                  "geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")

 for(index in c(1:length(peakAnnoList))) {
  filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
  df <- as.data.frame(peakAnnoList[[index]])
-  colnames(df) <- column_names
-  write.table(df[ , !(names(df) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
+  df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
+  df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
+  colnames(df_final) <- column_names
+  write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)

  # Draw individual plots


--- a/workflow/scripts/call_peaks_macs.py
+++ b/workflow/scripts/call_peaks_macs.py
@@ -138,8 +138,20 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes)
    with open(xcor, 'r') as xcor_fh:
        firstline = xcor_fh.readline()
        frag_lengths = firstline.split()[2]  # third column
-        fragment_length = frag_lengths.split(',')[0]  # grab first value
-        logger.info("Fraglen %s", fragment_length)
+        frag_lengths_array = frag_lengths.split(',')
+        fragment_length = 0
+        fragment = False
+        # Loop through all values of fragment length
+        for f in frag_lengths.split(','):
+            fragment_length = f
+            logger.info("Fraglen %s", fragment_length)
+            if int(fragment_length) > 50:
+                fragment = True
+                break
+
+        if fragment == False:
+            logger.info('Error in cross-correlation analysis: %s', frag_lengths_array)
+            raise Exception("Error in cross-correlation analysis: %s" % frag_lengths_array)

    # Generate narrow peaks and preliminary signal tracks


--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -46,7 +46,7 @@ SOFTWARE_REGEX = {
    'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
    'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
    'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
-    'Python': ['version_python.txt', r"python, version (\S+)"],
+    'Python': ['version_python.txt', r"Python (\S+)"],
    'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
 }


--- a/workflow/scripts/plotProfile.sh
+++ b/workflow/scripts/plotProfile.sh
-#!/bin/bash
-#plotProfile.sh
-
-bws=`ls *.bw`
-gtf=`ls *.gtf *.bed`
-
-computeMatrix reference-point \
-	--referencePoint TSS \
-	-S $bws \
-	-R $gtf \
-	--skipZeros \
-	-o computeMatrix.gz
-
-plotProfile -m computeMatrix.gz \
-	-out plotProfile.png \
--- a/workflow/scripts/plot_profile.sh
+++ b/workflow/scripts/plot_profile.sh
+#!/bin/bash
+#plot_profile.sh
+
+script_name="plot_profile.sh"
+
+#Help function
+usage() {
+  echo "-h  --Help documentation for $script_name"
+  echo "-g  --File path to gtf/bed files"
+  echo "Example: $script_name -g 'genome.gtf'"
+  exit 1
+}
+
+
+raise()
+{
+  echo "${1}" >&2
+}
+
+check_tools() {
+  raise "
+   Checking for required libraries and components on this system
+   "
+   deeptools --version &> version_deeptools.txt
+   if [ $? -gt 0 ]
+     then
+      raise "Missing deeptools"
+      return 1
+    fi
+}
+
+compute_matrix() {
+  raise "
+  Computing matrix on ${1} using ${2}
+  "
+
+  computeMatrix reference-point \
+    --referencePoint TSS \
+    -S ${1} \
+    -R ${2} \
+    --skipZeros \
+    -o computeMatrix.gz \
+    -p max/2
+
+  if [ $? -gt 0 ]
+  then
+    raise "Problem building matrix"
+    return 1
+  fi
+}
+
+plot_profile() {
+  raise "
+  Plotting profile
+  "
+
+  plotProfile -m computeMatrix.gz \
+    -out plotProfile.png
+
+  if [ $? -gt 0 ]
+  then
+    raise "Problem plotting"
+    return 1
+  fi
+}
+
+
+run_main() {
+
+  # Parsing options
+  OPTIND=1 # Reset OPTIND
+  while getopts :g:h opt
+      do
+          case $opt in
+              g) gtf=$OPTARG;;
+              h) usage;;
+          esac
+      done
+
+  shift $(($OPTIND -1))
+
+  # Check for mandatory options
+  if [[ -z $gtf ]]; then
+      usage
+  fi
+
+  bws=$(ls *pooled.fc_signal.bw)
+
+  check_tools || exit 1
+
+  compute_matrix "${bws}" "${gtf}" || return 1
+
+  plot_profile || return 1
+
+  raise "ALL COMPLETE"
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]
+then
+  run_main "$@"
+  if [ $? -gt 0 ]
+  then
+    exit 1
+  fi
+fi
--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -172,26 +172,7 @@ def self_psuedoreplication(tag_file, prefix, paired):
    return pseudoreplicate_dict


-def main():
-    args = get_args()
-    paired = args.paired
-    design = args.design
-    cutoff_ratio = args.cutoff
-
-    # Create a file handler
-    handler = logging.FileHandler('experiment_generation.log')
-    logger.addHandler(handler)
-
-    # Read files as dataframes
-    design_df = pd.read_csv(design, sep='\t')
-
-    # Get current directory to build paths
-    cwd = os.getcwd() 
-
-    # Check Number of replicates and replicates
-    no_reps = check_replicates(design_df)
-    no_unique_controls = check_controls(design_df)
-
+def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls):
    if no_reps == 1:
        logger.info("No other replicate specified "
                    "so processing as an unreplicated experiment.")
@@ -223,83 +204,43 @@ def main():
        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
        pool_control = pool_control_tmp

-    if not replicated:
-
-        # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
-        experiment_id = design_df.at[0, 'experiment_id']
-        replicate = design_df.at[0, 'replicate']
-        design_new_df = design_df.loc[np.repeat(design_df.index, 4)].reset_index()

-        # Update tagAlign with single end data
-        if paired:
-            design_new_df['tag_align'] = design_new_df['se_tag_align']
-        design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
-
-        design_new_df['replicate'] = design_new_df['replicate'].astype(str)
-        design_new_df.at[1, 'sample_id'] = experiment_id + '_pr1'
-        design_new_df.at[1, 'replicate'] = '1_pr'
-        design_new_df.at[1, 'xcor'] = 'Calculate'
-        design_new_df.at[2, 'sample_id'] = experiment_id + '_pr2'
-        design_new_df.at[2, 'replicate'] = '2_pr'
-        design_new_df.at[2, 'xcor'] = 'Calculate'
-        design_new_df.at[3, 'sample_id'] = experiment_id + '_pooled'
-        design_new_df.at[3, 'replicate'] = 'pooled'
-        design_new_df.at[3, 'xcor'] = 'Calculate'
-        design_new_df.at[3, 'tag_align'] = design_new_df.at[0, 'tag_align']
-
-        # Make 2 self psuedoreplicates
-        self_pseudoreplicates_dict = {}
-        for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
-            replicate_prefix = experiment_id + '_' + str(rep)
-            self_pseudoreplicates_dict = \
-                self_psuedoreplication(tag_file, replicate_prefix, paired)
-
-        # Update design to include new self pseudo replicates
-        for rep, pseudorep_file in self_pseudoreplicates_dict.items():
-            path_to_file = cwd + '/' + pseudorep_file
-            replicate = rep + 1
-            design_new_df.loc[replicate, 'tag_align'] = path_to_file
-
-        # Drop index column
-        design_new_df.drop(labels='index', axis=1, inplace=True)
+    # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
+    experiment_id = design_df.at[0, 'experiment_id']
+    replicate_files = design_df.tag_align.unique()
+    pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
+
+    # Make 2 self psuedoreplicates
+    pseudoreplicates_dict = {}
+    for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
+        replicate_prefix = experiment_id + '_' + str(rep)
+        pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
+        pseudoreplicates_dict[rep] = pr_dict
+
+    # Update design to include new self pseudo replicates
+    pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
+    pool_pseudoreplicates_dict = {}
+    for index, row in pseudoreplicates_df.iterrows():
+        replicate_id = index + 1
+        pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
+        pool_replicate = pool(row, pr_filename, False)
+        pool_pseudoreplicates_dict[replicate_id] = pool_replicate
+
+    design_new_df = design_df #.loc[np.repeat(design_df.index, 4)].reset_index()
+    # Update tagAlign with single end data
+    if paired:
+        design_new_df['tag_align'] = design_new_df['se_tag_align']
+    design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)

+    # If paired change to single End
+    if paired:
+        pool_experiment_se = bedpe_to_tagalign(pool_experiment, experiment_id + "_pooled")
    else:
-        # Make pool of replicates
-        replicate_files = design_df.tag_align.unique()
-        experiment_id = design_df.at[0, 'experiment_id']
-        pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
-
-        # If paired change to single End
-        if paired:
-            pool_experiment_se = bedpe_to_tagalign(pool_experiment, experiment_id + "_pooled")
-        else:
-            pool_experiment_se = pool_experiment
-
-        # Make self psuedoreplicates equivalent to number of replicates
-        pseudoreplicates_dict = {}
-        for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
-            replicate_prefix = experiment_id + '_' + str(rep)
-            pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
-            pseudoreplicates_dict[rep] = pr_dict
-        # Merge self psuedoreplication for each true replicate
-        pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
-        pool_pseudoreplicates_dict = {}
-        for index, row in pseudoreplicates_df.iterrows():
-            replicate_id = index + 1
-            pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
-            pool_replicate = pool(row, pr_filename, False)
-            pool_pseudoreplicates_dict[replicate_id] = pool_replicate
-
-        design_new_df = design_df
-        # Update tagAlign with single end data
-        if paired:
-            design_new_df['tag_align'] = design_new_df['se_tag_align']
-        design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
-        # Check controls against cutoff_ratio
-        # if so replace with pool_control
-        # unless single control was used
-
+        pool_experiment_se = pool_experiment

+    # Check controls against cutoff_ratio
+    # if so replace with pool_control
+    # unless single control was used
    if not single_control:
        path_to_pool_control = cwd + '/' + pool_control
        if control_df.values.max() > cutoff_ratio:
@@ -327,7 +268,10 @@ def main():
                                                            path_to_control

    else:
-        path_to_pool_control = cwd + '/' +  pool_control
+        if paired:
+            path_to_pool_control = cwd + '/' + pool_control
+        else:
+            path_to_pool_control = pool_control
        design_new_df['control_tag_align'] = path_to_pool_control

    # Add in pseudo replicates
@@ -349,7 +293,34 @@ def main():
    tmp_metadata['tag_align'] = path_to_file
    design_new_df = design_new_df.append(tmp_metadata)

+    return design_new_df
+
+
+def main():
+    args = get_args()
+    paired = args.paired
+    design = args.design
+    cutoff_ratio = args.cutoff
+
+    # Create a file handler
+    handler = logging.FileHandler('experiment_generation.log')
+    logger.addHandler(handler)
+
+    # Read files as dataframes
+    design_df = pd.read_csv(design, sep='\t')
+
+    # Get current directory to build paths
+    cwd = os.getcwd()
+
+    # Check Number of replicates and replicates
+    no_reps = check_replicates(design_df)
+    no_unique_controls = check_controls(design_df)
+
+    # Generate new design file
+    design_new_df = generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls)
+
    # Write out new dataframe
+    experiment_id = design_df.at[0, 'experiment_id']
    design_new_df.to_csv(experiment_id + '_ppr.tsv',
                         header=True, sep='\t', index=False)


--- a/workflow/scripts/xcor.py
+++ b/workflow/scripts/xcor.py
@@ -103,14 +103,20 @@ def xcor(tag, paired):
    uncompressed_tag_filename = tag_basename

    # Subsample tagAlign file
-    number_reads = 15000000
+    number_reads = 20000000
    subsampled_tag_filename = \
        tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000)

+    tag_extended = 'cat.tagAlign.gz'
+    out, err = utils.run_pipe([
+        "zcat %s %s %s" %
+        (tag, tag, tag)
+    ], outfile=tag_extended)
+
    steps = [
        'zcat %s' % (tag),
        'grep -v "chrM"',
-        'shuf -n %d --random-source=%s' % (number_reads, tag)]
+        'shuf -n %d --random-source=%s' % (number_reads, tag_extended)]

    if paired:
        steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])

--- a/workflow/tests/plot_profile.bats
+++ b/workflow/tests/plot_profile.bats
+#!/opt/bats/libexec/bats-core/ bats
+
+profile_script="./workflow/scripts/plot_profile.sh"
+
+@test "Test deeptools present" {
+  source ${profile_script}
+  run check_tools
+}
+
+@test "Test deeptools computeMatrix" {
+  source ${profile_script}
+  run compute_matrix test_data/ENCSR238SGC_pooled.fc_signal.bw /project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf
+  FILE=computeMatrix.gz
+  if [[ -s "$FILE" ]]; then
+      echo "$FILE exists and not empty"
+  fi
+}
+
+@test "Test deeptools plotProfile" {
+  source ${profile_script}
+  run plot_profile computeMatrix.gz
+  FILE=plotProfile.png
+  if [[ -s "$FILE" ]]; then
+      echo "$FILE exists and not empty"
+  fi
+}
--- a/workflow/tests/test_annotate_peaks.py
+++ b/workflow/tests/test_annotate_peaks.py
@@ -25,6 +25,10 @@ def test_annotation_singleend():
    annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv'
    assert os.path.exists(annotation_file)
    assert utils.count_lines(annotation_file) >= 149284
+    df = pd.read_csv(annotation_file, sep = "\t", header = 0)
+    print(df.head())
+    #assert df['symbol'].notna().all()
+    assert not(df['symbol'].isnull().values.any())


 @pytest.mark.pairedend
@@ -41,4 +45,8 @@ def test_upsetplot_pairedend():
 def test_annotation_pairedend():
    annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv'
    assert os.path.exists(annotation_file)
-    assert utils.count_lines(annotation_file) >= 25494
+    assert utils.count_lines(annotation_file) >= 25367
+    df = pd.read_csv(annotation_file, sep = "\t", header = 0)
+    print(df.head())
+    #assert df['symbol'].notna().all()
+    assert not(df['symbol'].isnull().values.any()) 
--- a/workflow/tests/test_call_peaks_macs.py
+++ b/workflow/tests/test_call_peaks_macs.py
@@ -3,10 +3,29 @@
 import pytest
 import os
 import utils
+from io import StringIO
+import call_peaks_macs

 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
                '/../output/callPeaksMACS/'

+test_data_path = os.path.dirname(os.path.abspath(__file__)) + \
+		'/../../test_data/'
+        
+
+@pytest.mark.unit
+def test_fragment_length():
+    experiment = "experiment.tagAlign.gz"
+    control = "control.tagAlign.gz"
+    prefix = 'test'
+    genome_size = 'hs'
+    chrom_sizes = 'genomefile.txt'
+    cross_qc = os.path.join(test_data_path, 'test_cross.qc')
+    with pytest.raises(Exception) as excinfo:
+        call_peaks_macs.call_peaks_macs(experiment, cross_qc, control, prefix, genome_size, chrom_sizes)
+    assert str(excinfo.value) == "Error in cross-correlation analysis: ['0', '20', '33']"
+
+

 @pytest.mark.singleend
 def test_fc_signal_singleend():
@@ -26,7 +45,7 @@ def test_peaks_xls_singleend():
 @pytest.mark.singleend
 def test_peaks_bed_singleend():
    peak_file = test_output_path +  'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak'
-    assert utils.count_lines(peak_file) == 227389
+    assert utils.count_lines(peak_file) == 226738


 @pytest.mark.pairedend
@@ -47,4 +66,4 @@ def test_peaks_xls_pairedend():
 @pytest.mark.pairedend
 def test_peaks_bed_pairedend():
    peak_file = test_output_path + 'ENCSR729LGA/2/' + 'ENCLB568IYX.narrowPeak'
-    assert utils.count_lines(peak_file) == 113821
+    assert utils.count_lines(peak_file) == 112631
--- a/workflow/tests/test_diff_peaks.py
+++ b/workflow/tests/test_diff_peaks.py
@@ -15,7 +15,7 @@ def test_diff_peaks_singleend_single_rep():
    assert os.path.isdir(test_output_path) == False


-@pytest.mark.pairedend
+@pytest.mark.pairedendskip_true
 def test_diff_peaks_pairedend_single_rep():
    assert os.path.isdir(test_output_path) == False

@@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep():
        assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed'))
        diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv'
    assert os.path.exists(diffbind_file)
-    assert utils.count_lines(diffbind_file) >= 66201
+    assert utils.count_lines(diffbind_file) >= 65124
--- a/workflow/tests/test_generate_software_references.py
+++ b/workflow/tests/test_generate_software_references.py
@@ -4,6 +4,7 @@ import pytest
 import os
 import utils
 import yaml
+from bs4 import BeautifulSoup

 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
                '/../output/multiqcReport/'
@@ -21,3 +22,14 @@ def test_software_references_output():
        data_loaded = yaml.load(stream)

    assert len(data_loaded['data'].split('<ul>')) == 19
+
+
+@pytest.mark.singleend
+def test_software_references_html():
+    multiqc_report = os.path.join(test_output_path, 'multiqc_report.html')
+    html_file = open(multiqc_report, 'r')
+    source_code = html_file.read()
+    multiqc_html = BeautifulSoup(source_code, 'html.parser')
+    references = multiqc_html.find(id="mqc-module-section-Software_References")
+    assert references is not None
+    assert len(references.find_all('ul')) == 18
--- a/workflow/tests/test_generate_software_versions.py
+++ b/workflow/tests/test_generate_software_versions.py
@@ -20,4 +20,5 @@ def test_software_versions_output():
    with open(software_versions, 'r') as stream:
        data_loaded = yaml.load(stream)

-    assert  len(data_loaded['data'].split('<dt>')) == 17
+    assert  len(data_loaded['data'].split('<dt>')) == 18
+    assert  'Not Run' not in data_loaded['data'].split('<dt>')[17] 
--- a/workflow/tests/test_overlap_peaks.py
+++ b/workflow/tests/test_overlap_peaks.py
@@ -44,4 +44,10 @@ def test_overlap_peaks_singleend():
 def test_overlap_peaks_pairedend():
    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
    peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
-    assert utils.count_lines(peak_file) >= 25758
+    assert utils.count_lines(peak_file) >= 25657
+
+@pytest.mark.singlecontrol
+def test_overlap_peaks_singlecontrol():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
+    peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
+    assert utils.count_lines(peak_file) >= 35097
--- a/workflow/tests/test_plot_profile.py
+++ b/workflow/tests/test_plot_profile.py
+#!/usr/bin/env python3
+
+import pytest
+import os
+import utils
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../output/experimentQC/'
+
+
+@pytest.mark.singleend
+def test_plot_singleend():
+    assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
+    assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
+
+@pytest.mark.pairedend
+def test_plot_pairedend():
+    assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
+    assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -5,16 +5,18 @@ import pandas as pd
 from io import StringIO
 import os
 import pool_and_psuedoreplicate
+import shutil

+test_design_path = os.path.dirname(os.path.abspath(__file__)) + \
+		'/../../test_data/'
 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
                '/../output/design/'

-DESIGN_STRING = """sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_tag_align
-A_1\tA_1.bedse.gz\tA_1.cc.qc\tLiver\tH3K27ac\tNone\t1\tB_1.bedse.gz
-A_2\tA_2.bedse.gz\tA_2.cc.qc\tLiver\tH3K27ac\tNone\t2\tB_2.bedse.gz
+DESIGN_STRING = """sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\tcontrol_tag_align
+A_1\tA_1.tagAlign.gz\tA_1.bedse.gz\tA_1.cc.qc\tA\tLiver\tH3K27ac\tNone\t1\tB_1\tB_1.bedse.gz
+A_2\tA_2.tagAlign.gz\tA_2.bedse.gz\tA_2.cc.qc\tA\tLiver\tH3K27ac\tNone\t2\tB_2\tB_2.bedse.gz
 """

-
 @pytest.fixture
 def design_experiment():
    design_file = StringIO(DESIGN_STRING)
@@ -31,9 +33,12 @@ def design_experiment_2(design_experiment):

 @pytest.fixture
 def design_experiment_3(design_experiment):
-    # Update second control to be same as first
-    design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
-    return design_experiment
+    # Drop Replicate A_2
+    design_df = design_experiment.drop(design_experiment.index[1])
+    # Update to be paired as first
+    design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
+    design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
+    return design_df


 @pytest.mark.unit
@@ -62,7 +67,25 @@ def test_check_controls_single(design_experiment_3):

 @pytest.mark.unit
 def test_single_rep(design_experiment_2):
-    single_rep = pool_and_psuedoreplicate.main(design_experiment_2, "true", 1.2)
+    cwd = os.getcwd()
+    shutil.copy(test_design_path + 'A_1.bedse.gz', cwd)
+    shutil.copy(test_design_path + 'B_1.bedse.gz', cwd)
+    shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
+    shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
+    single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
+    assert single_rep.shape[0] == 4
+    assert len(single_rep['control_tag_align'].unique()) == 2
+    assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
+
+
+@pytest.mark.unit
+def test_single_control(design_experiment_3):
+    cwd = os.getcwd()
+    shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
+    single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
+    assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]


 @pytest.mark.singleend

--- a/workflow/tests/test_xcor.py
+++ b/workflow/tests/test_xcor.py
@@ -17,9 +17,9 @@ def test_cross_plot_singleend():
 def test_cross_qc_singleend():
    qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc")
    df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
-    assert df_xcor[2].iloc[0] == '190,200,210'
-    assert df_xcor[8].iloc[0] == 1.025906
-    assert round(df_xcor[9].iloc[0], 6) == 1.139671
+    assert df_xcor[2].iloc[0] == '185,195,205'
+    assert df_xcor[8].iloc[0] == 1.02454
+    assert df_xcor[9].iloc[0] == 0.8098014


 @pytest.mark.pairedend
@@ -31,6 +31,6 @@ def test_cross_qc_pairedend():
 def test_cross_plot_pairedend():
    qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc")
    df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
-    assert df_xcor[2].iloc[0] == '220,430,475'
-    assert round(df_xcor[8].iloc[0],6) == 1.060018
-    assert df_xcor[9].iloc[0] == 4.099357
+    assert df_xcor[2].iloc[0] == '215,225,455'
+    assert round(df_xcor[8].iloc[0],6) == 1.056201
+    assert df_xcor[9].iloc[0] == 3.599357
No results found