diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py index 67f20f1827fc6287eb6638a648f86c92958d1d6a..41996567ee08f181270181b7821e874a0c92d2ca 100644 --- a/workflow/scripts/pool_and_psuedoreplicate.py +++ b/workflow/scripts/pool_and_psuedoreplicate.py @@ -172,9 +172,56 @@ def self_psuedoreplication(tag_file, prefix, paired): return pseudoreplicate_dict -def generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio): +def main(): + args = get_args() + paired = args.paired + design = args.design + cutoff_ratio = args.cutoff + + # Create a file handler + handler = logging.FileHandler('experiment_generation.log') + logger.addHandler(handler) + + # Read files as dataframes + design_df = pd.read_csv(design, sep='\t') + # Get current directory to build paths - cwd = os.getcwd() + cwd = os.getcwd() + + # Check Number of replicates and replicates + no_reps = check_replicates(design_df) + no_unique_controls = check_controls(design_df) + + if no_reps == 1: + logger.info("No other replicate specified " + "so processing as an unreplicated experiment.") + replicated = False + + else: + logger.info("Multiple replicates specified " + "so processing as a replicated experiment.") + replicated = True + + if no_unique_controls == 1 and replicated: + logger.info("Only a single control was specified " + "so using same control for replicates, pool and psuedoreplicates.") + single_control = True + else: + logger.info("Will merge only unique controls for pooled.") + single_control = False + + # Pool the controls for checking + if not single_control: + control_df = get_read_count_ratio(design_df) + control_files = design_df.control_tag_align.unique() + pool_control = pool(control_files, "pool_control", paired) + else: + pool_control = design_df.control_tag_align.unique()[0] + + # if paired_end make tagAlign + if paired: + pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") + pool_control = pool_control_tmp if not replicated: @@ -307,57 +354,5 @@ def generate_design(design_df, replicated, single_control, pool_control, paired, header=True, sep='\t', index=False) -def main(): - args = get_args() - paired = args.paired - design = args.design - cutoff_ratio = args.cutoff - - # Create a file handler - handler = logging.FileHandler('experiment_generation.log') - logger.addHandler(handler) - - # Read files as dataframes - design_df = pd.read_csv(design, sep='\t') - - # Check Number of replicates and replicates - no_reps = check_replicates(design_df) - no_unique_controls = check_controls(design_df) - - if no_reps == 1: - logger.info("No other replicate specified " - "so processing as an unreplicated experiment.") - replicated = False - - else: - logger.info("Multiple replicates specified " - "so processing as a replicated experiment.") - replicated = True - - if no_unique_controls == 1 and replicated: - logger.info("Only a single control was specified " - "so using same control for replicates, pool and psuedoreplicates.") - single_control = True - else: - logger.info("Will merge only unique controls for pooled.") - single_control = False - - # Pool the controls for checking - if not single_control: - control_df = get_read_count_ratio(design_df) - control_files = design_df.control_tag_align.unique() - pool_control = pool(control_files, "pool_control", paired) - else: - pool_control = design_df.control_tag_align.unique()[0] - - # if paired_end make tagAlign - if paired: - pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") - pool_control = pool_control_tmp - - # Psuedoreplicate and update design accordingly - generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio) - - if __name__ == '__main__': main() diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py index 31fffc57e7eaa598a933c97c92b1c901ba731ba7..26554723a5ab61abba1685a7de3f4655019dbbe3 100644 --- a/workflow/tests/test_pool_and_psuedoreplicate.py +++ b/workflow/tests/test_pool_and_psuedoreplicate.py @@ -54,6 +54,12 @@ def test_check_controls(design_experiment): assert no_controls == 2 +@pytest.mark.unit +def test_check_controls_no(design_experiment_2): + no_controls = pool_and_psuedoreplicate.check_controls(design_experiment_2) + assert no_controls == 1 + + @pytest.mark.unit def test_check_controls_single(design_experiment_3): no_controls = pool_and_psuedoreplicate.check_controls(design_experiment_3)