update tests for pool and psuedoreplicates

e505acc3 · Jeremy Mathews · 06ef989b · e505acc3 · e505acc3
Commit e505acc3 authored 5 years ago by Jeremy Mathews
--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -172,9 +172,56 @@ def self_psuedoreplication(tag_file, prefix, paired):
    return pseudoreplicate_dict


-def generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio):
+def main():
+    args = get_args()
+    paired = args.paired
+    design = args.design
+    cutoff_ratio = args.cutoff
+
+    # Create a file handler
+    handler = logging.FileHandler('experiment_generation.log')
+    logger.addHandler(handler)
+
+    # Read files as dataframes
+    design_df = pd.read_csv(design, sep='\t')
+
    # Get current directory to build paths
-    cwd = os.getcwd()
+    cwd = os.getcwd() 
+
+    # Check Number of replicates and replicates
+    no_reps = check_replicates(design_df)
+    no_unique_controls = check_controls(design_df)
+
+    if no_reps == 1:
+        logger.info("No other replicate specified "
+                    "so processing as an unreplicated experiment.")
+        replicated = False
+
+    else:
+        logger.info("Multiple replicates specified "
+                    "so processing as a replicated experiment.")
+        replicated = True
+
+    if no_unique_controls == 1 and replicated:
+        logger.info("Only a single control was specified "
+                    "so using same control for replicates, pool and psuedoreplicates.")
+        single_control = True
+    else:
+        logger.info("Will merge only unique controls for pooled.")
+        single_control = False
+
+    # Pool the controls for checking
+    if not single_control:
+        control_df = get_read_count_ratio(design_df)
+        control_files = design_df.control_tag_align.unique()
+        pool_control = pool(control_files, "pool_control", paired)
+    else:
+        pool_control = design_df.control_tag_align.unique()[0]
+
+    # if paired_end make tagAlign
+    if paired:
+        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
+        pool_control = pool_control_tmp

    if not replicated:

@@ -307,57 +354,5 @@ def generate_design(design_df, replicated, single_control, pool_control, paired,
                         header=True, sep='\t', index=False)


-def main():
-    args = get_args()
-    paired = args.paired
-    design = args.design
-    cutoff_ratio = args.cutoff
-
-    # Create a file handler
-    handler = logging.FileHandler('experiment_generation.log')
-    logger.addHandler(handler)
-
-    # Read files as dataframes
-    design_df = pd.read_csv(design, sep='\t')
-
-    # Check Number of replicates and replicates
-    no_reps = check_replicates(design_df)
-    no_unique_controls = check_controls(design_df)
-
-    if no_reps == 1:
-        logger.info("No other replicate specified "
-                    "so processing as an unreplicated experiment.")
-        replicated = False
-
-    else:
-        logger.info("Multiple replicates specified "
-                    "so processing as a replicated experiment.")
-        replicated = True
-
-    if no_unique_controls == 1 and replicated:
-        logger.info("Only a single control was specified "
-                    "so using same control for replicates, pool and psuedoreplicates.")
-        single_control = True
-    else:
-        logger.info("Will merge only unique controls for pooled.")
-        single_control = False
-
-    # Pool the controls for checking
-    if not single_control:
-        control_df = get_read_count_ratio(design_df)
-        control_files = design_df.control_tag_align.unique()
-        pool_control = pool(control_files, "pool_control", paired)
-    else:
-        pool_control = design_df.control_tag_align.unique()[0]
-
-    # if paired_end make tagAlign
-    if paired:
-        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
-        pool_control = pool_control_tmp
-
-    # Psuedoreplicate and update design accordingly
-    generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio)
-
-
 if __name__ == '__main__':
    main()
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -54,6 +54,12 @@ def test_check_controls(design_experiment):
    assert no_controls == 2


+@pytest.mark.unit
+def test_check_controls_no(design_experiment_2):
+    no_controls = pool_and_psuedoreplicate.check_controls(design_experiment_2)
+    assert no_controls == 1
+
+
 @pytest.mark.unit
 def test_check_controls_single(design_experiment_3):
    no_controls = pool_and_psuedoreplicate.check_controls(design_experiment_3)