From 0158a2a031f0308480de4ac847054a56837be7ee Mon Sep 17 00:00:00 2001
From: Jeremy Mathews <Jeremy.Mathews@utsouthwestern.edu>
Date: Mon, 15 Jul 2019 09:21:16 -0500
Subject: [PATCH] test pool and psuedoreplicate for single experiment

---
 workflow/scripts/pool_and_psuedoreplicate.py  | 107 ++++++++++--------
 .../tests/test_pool_and_psuedoreplicate.py    |   8 ++
 2 files changed, 65 insertions(+), 50 deletions(-)

diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py
index 07eac44..93d1df4 100644
--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -172,58 +172,10 @@ def self_psuedoreplication(tag_file, prefix, paired):
     return pseudoreplicate_dict
 
 
-def main():
-    args = get_args()
-    paired = args.paired
-    design = args.design
-    cutoff_ratio = args.cutoff
-
-    # Create a file handler
-    handler = logging.FileHandler('experiment_generation.log')
-    logger.addHandler(handler)
-
-    # Read files as dataframes
-    design_df = pd.read_csv(design, sep='\t')
-
+def generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio):
     # Get current directory to build paths
     cwd = os.getcwd()
 
-    # Check Number of replicates and replicates
-    no_reps = check_replicates(design_df)
-    no_unique_controls = check_controls(design_df)
-
-    if no_reps == 1:
-        logger.info("No other replicate specified "
-                    "so processing as an unreplicated experiment.")
-        replicated = False
-
-    else:
-        logger.info("Multiple replicates specified "
-                    "so processing as a replicated experiment.")
-        replicated = True
-
-    if no_unique_controls == 1 and replicated:
-        logger.info("Only a single control was specified "
-                    "so using same control for replicates, pool and psuedoreplicates.")
-        single_control = True
-    else:
-        logger.info("Will merge only unique controls for pooled.")
-        single_control = False
-
-    # Pool the controls for checking
-    if not single_control:
-        control_df = get_read_count_ratio(design_df)
-        control_files = design_df.control_tag_align.unique()
-        pool_control = pool(control_files, "pool_control", paired)
-    else:
-        pool_control = design_df.control_tag_align.unique()[0]
-
-    # if paired_end make tagAlign
-    if paired:
-        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
-        pool_control = pool_control_tmp
-
-    # Psuedoreplicate and update design accordingly
     if not replicated:
 
         # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
@@ -282,7 +234,6 @@ def main():
             replicate_prefix = experiment_id + '_' + str(rep)
             pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
             pseudoreplicates_dict[rep] = pr_dict
-
         # Merge self psuedoreplication for each true replicate
         pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
         pool_pseudoreplicates_dict = {}
@@ -356,5 +307,61 @@ def main():
                          header=True, sep='\t', index=False)
 
 
+def main():
+    args = get_args()
+    paired = args.paired
+    design = args.design
+    cutoff_ratio = args.cutoff
+
+    # Create a file handler
+    handler = logging.FileHandler('experiment_generation.log')
+    logger.addHandler(handler)
+
+    # Read files as dataframes
+    design_df = pd.read_csv(design, sep='\t')
+
+    # Check Number of replicates and replicates
+    no_reps = check_replicates(design_df)
+    no_unique_controls = check_controls(design_df)
+
+    if no_reps == 1:
+        logger.info("No other replicate specified "
+                    "so processing as an unreplicated experiment.")
+        replicated = False
+
+    else:
+        logger.info("Multiple replicates specified "
+                    "so processing as a replicated experiment.")
+        replicated = True
+
+    if no_unique_controls == 1 and replicated:
+        logger.info("Only a single control was specified "
+                    "so using same control for replicates, pool and psuedoreplicates.")
+        single_control = True
+    else:
+        logger.info("Will merge only unique controls for pooled.")
+        single_control = False
+
+    # Pool the controls for checking
+    if not single_control:
+        control_df = get_read_count_ratio(design_df)
+        control_files = design_df.control_tag_align.unique()
+l_df = get_read_count_ratio(design_df)
+        control_files = design_df.control_tag_align.unique()
+        pool_control = pool(control_files, "pool_control", paired)
+
+        pool_control = pool(control_files, "pool_control", paired)
+    else:
+        pool_control = design_df.control_tag_align.unique()[0]
+
+    # if paired_end make tagAlign
+    if paired:
+        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
+        pool_control = pool_control_tmp
+
+    # Psuedoreplicate and update design accordingly
+    generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio)
+
+
 if __name__ == '__main__':
     main()
diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py
index 31fffc5..58ceda9 100644
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -60,6 +60,14 @@ def test_check_controls_single(design_experiment_3):
     assert no_controls == 1
 
 
+@pytest.mark.unit
+def test_generate_design(design_experiment_2)
+    control_df = pool_and_psuedoreplicate.get_read_count_ratio(design_experiment_2)
+    control_files = design_experiment_2.control_tag_align.unique()
+    pool_control = pool_and_psuedoreplicate.pool(control_files, "pool_control", true)
+    new_design = pool_and_psuedoreplicate.generate_design(design_experiment_2, false, false, pool_control, true, 1.2)
+
+
 @pytest.mark.singleend
 def test_pool_and_psuedoreplicate_singleend():
     design_file = os.path.join(test_output_path, 'ENCSR238SGC_ppr.tsv')
-- 
GitLab