7a76914b · e4c84105 · e4c84105 · e4c84105 · e4c84105 · e4c84105
--- a/workflow/scripts/plotProfile.sh
+++ b/workflow/scripts/plotProfile.sh
-#!/bin/bash
-#plotProfile.sh
-
-bws=$(ls *.bw)
-gtf=$(ls *.gtf *.bed)
-
-computeMatrix reference-point \
-	--referencePoint TSS \
-	-S $bws \
-	-R $gtf \
-	--skipZeros \
-	-o computeMatrix.gz
-	-p max/2
-
-plotProfile -m computeMatrix.gz \
-	-out plotProfile.png \
--- a/workflow/scripts/plot_profile.sh
+++ b/workflow/scripts/plot_profile.sh
+#!/bin/bash
+#plot_profile.sh
+
+script_name="plot_profile.sh"
+
+#Help function
+usage() {
+  echo "-h  --Help documentation for $script_name"
+  echo "-g  --File path to gtf/bed files"
+  echo "Example: $script_name -g 'genome.gtf'"
+  exit 1
+}
+
+
+raise()
+{
+  echo "${1}" >&2
+}
+
+check_tools() {
+  raise "
+   Checking for required libraries and components on this system
+   "
+   deeptools --version &> version_deeptools.txt
+   if [ $? -gt 0 ]
+     then
+      raise "Missing deeptools"
+      return 1
+    fi
+}
+
+compute_matrix() {
+  raise "
+  Computing matrix on ${1} using ${2}
+  "
+
+  computeMatrix reference-point \
+    --referencePoint TSS \
+    -S ${1} \
+    -R ${2} \
+    --skipZeros \
+    -o computeMatrix.gz \
+    -p max/2
+
+  if [ $? -gt 0 ]
+  then
+    raise "Problem building matrix"
+    return 1
+  fi
+}
+
+plot_profile() {
+  raise "
+  Plotting profile
+  "
+
+  plotProfile -m computeMatrix.gz \
+    -out plotProfile.png
+
+  if [ $? -gt 0 ]
+  then
+    raise "Problem plotting"
+    return 1
+  fi
+}
+
+
+run_main() {
+
+  # Parsing options
+  OPTIND=1 # Reset OPTIND
+  while getopts :g:h opt
+      do
+          case $opt in
+              g) gtf=$OPTARG;;
+              h) usage;;
+          esac
+      done
+
+  shift $(($OPTIND -1))
+
+  # Check for mandatory options
+  if [[ -z $gtf ]]; then
+      usage
+  fi
+
+  bws=$(ls *pooled.fc_signal.bw)
+
+  check_tools || exit 1
+
+  compute_matrix "${bws}" "${gtf}" || return 1
+
+  plot_profile || return 1
+
+  raise "ALL COMPLETE"
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]
+then
+  run_main "$@"
+  if [ $? -gt 0 ]
+  then
+    exit 1
+  fi
+fi
--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
        pool_control = pool_control_tmp

+
    # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
    experiment_id = design_df.at[0, 'experiment_id']
    replicate_files = design_df.tag_align.unique()
@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
    else:
        pool_experiment_se = pool_experiment

-        # Check controls against cutoff_ratio
-        # if so replace with pool_control
-        # unless single control was used
+    # Check controls against cutoff_ratio
+    # if so replace with pool_control
+    # unless single control was used
    if not single_control:
        path_to_pool_control = cwd + '/' + pool_control
        if control_df.values.max() > cutoff_ratio:
@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
                                                            path_to_control

    else:
-        path_to_pool_control = cwd + '/' +  pool_control
+        if paired:
+            path_to_pool_control = cwd + '/' + pool_control
+        else:
+            path_to_pool_control = pool_control
        design_new_df['control_tag_align'] = path_to_pool_control

    # Add in pseudo replicates
@@ -306,7 +310,7 @@ def main():
    design_df = pd.read_csv(design, sep='\t')

    # Get current directory to build paths
-    cwd = os.getcwd() 
+    cwd = os.getcwd()

    # Check Number of replicates and replicates
    no_reps = check_replicates(design_df)

--- a/workflow/tests/plot_profile.bats
+++ b/workflow/tests/plot_profile.bats
+#!/opt/bats/libexec/bats-core/ bats
+
+profile_script="./workflow/scripts/plot_profile.sh"
+
+@test "Test deeptools present" {
+  source ${profile_script}
+  run check_tools
+}
+
+@test "Test deeptools computeMatrix" {
+  source ${profile_script}
+  run compute_matrix test_data/ENCSR238SGC_pooled.fc_signal.bw /project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf
+  FILE=computeMatrix.gz
+  if [[ -s "$FILE" ]]; then
+      echo "$FILE exists and not empty"
+  fi
+}
+
+@test "Test deeptools plotProfile" {
+  source ${profile_script}
+  run plot_profile computeMatrix.gz
+  FILE=plotProfile.png
+  if [[ -s "$FILE" ]]; then
+      echo "$FILE exists and not empty"
+  fi
+}
--- a/workflow/tests/test_annotate_peaks.py
+++ b/workflow/tests/test_annotate_peaks.py
@@ -25,6 +25,10 @@ def test_annotation_singleend():
    annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv'
    assert os.path.exists(annotation_file)
    assert utils.count_lines(annotation_file) >= 149284
+    df = pd.read_csv(annotation_file, sep = "\t", header = 0)
+    print(df.head())
+    #assert df['symbol'].notna().all()
+    assert not(df['symbol'].isnull().values.any())


 @pytest.mark.pairedend
@@ -42,3 +46,7 @@ def test_annotation_pairedend():
    annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv'
    assert os.path.exists(annotation_file)
    assert utils.count_lines(annotation_file) >= 25367
+    df = pd.read_csv(annotation_file, sep = "\t", header = 0)
+    print(df.head())
+    #assert df['symbol'].notna().all()
+    assert not(df['symbol'].isnull().values.any()) 
--- a/workflow/tests/test_call_peaks_macs.py
+++ b/workflow/tests/test_call_peaks_macs.py
@@ -3,10 +3,29 @@
 import pytest
 import os
 import utils
+from io import StringIO
+import call_peaks_macs

 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
                '/../output/callPeaksMACS/'

+test_data_path = os.path.dirname(os.path.abspath(__file__)) + \
+		'/../../test_data/'
+        
+
+@pytest.mark.unit
+def test_fragment_length():
+    experiment = "experiment.tagAlign.gz"
+    control = "control.tagAlign.gz"
+    prefix = 'test'
+    genome_size = 'hs'
+    chrom_sizes = 'genomefile.txt'
+    cross_qc = os.path.join(test_data_path, 'test_cross.qc')
+    with pytest.raises(Exception) as excinfo:
+        call_peaks_macs.call_peaks_macs(experiment, cross_qc, control, prefix, genome_size, chrom_sizes)
+    assert str(excinfo.value) == "Error in cross-correlation analysis: ['0', '20', '33']"
+
+

 @pytest.mark.singleend
 def test_fc_signal_singleend():

--- a/workflow/tests/test_diff_peaks.py
+++ b/workflow/tests/test_diff_peaks.py
@@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep():
        assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed'))
        diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv'
    assert os.path.exists(diffbind_file)
-    assert utils.count_lines(diffbind_file) >= 65182
+    assert utils.count_lines(diffbind_file) >= 65124
--- a/workflow/tests/test_generate_software_versions.py
+++ b/workflow/tests/test_generate_software_versions.py
@@ -21,3 +21,4 @@ def test_software_versions_output():
        data_loaded = yaml.load(stream)

    assert  len(data_loaded['data'].split('<dt>')) == 18
+    assert  'Not Run' not in data_loaded['data'].split('<dt>')[17] 
--- a/workflow/tests/test_overlap_peaks.py
+++ b/workflow/tests/test_overlap_peaks.py
@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
    peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
    assert utils.count_lines(peak_file) >= 25657
+
+@pytest.mark.singlecontrol
+def test_overlap_peaks_singlecontrol():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
+    peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
+    assert utils.count_lines(peak_file) >= 35097
--- a/workflow/tests/test_plot_profile.py
+++ b/workflow/tests/test_plot_profile.py
@@ -11,8 +11,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_plot_singleend():
    assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
-
+    assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0

 @pytest.mark.pairedend
 def test_plot_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'computeMatrix.gz'))
+    assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
+    assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):

 @pytest.fixture
 def design_experiment_3(design_experiment):
-    # Update second control to be same as first
-    design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
-    return design_experiment
+    # Drop Replicate A_2
+    design_df = design_experiment.drop(design_experiment.index[1])
+    # Update to be paired as first
+    design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
+    design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
+    return design_df


 @pytest.mark.unit
@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
    shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
    single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
    assert single_rep.shape[0] == 4
+    assert len(single_rep['control_tag_align'].unique()) == 2
+    assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
+
+
+@pytest.mark.unit
+def test_single_control(design_experiment_3):
+    cwd = os.getcwd()
+    shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
+    single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
+    assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
+

 @pytest.mark.singleend
 def test_pool_and_psuedoreplicate_singleend():
No results found