diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py index 07eac44c67414a043a3a33bd1c1ccc4a86eb9a87..93d1df40985fb1e6755a648dc487d5e78ae9df5d 100644 --- a/workflow/scripts/pool_and_psuedoreplicate.py +++ b/workflow/scripts/pool_and_psuedoreplicate.py @@ -172,58 +172,10 @@ def self_psuedoreplication(tag_file, prefix, paired): return pseudoreplicate_dict -def main(): - args = get_args() - paired = args.paired - design = args.design - cutoff_ratio = args.cutoff - - # Create a file handler - handler = logging.FileHandler('experiment_generation.log') - logger.addHandler(handler) - - # Read files as dataframes - design_df = pd.read_csv(design, sep='\t') - +def generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio): # Get current directory to build paths cwd = os.getcwd() - # Check Number of replicates and replicates - no_reps = check_replicates(design_df) - no_unique_controls = check_controls(design_df) - - if no_reps == 1: - logger.info("No other replicate specified " - "so processing as an unreplicated experiment.") - replicated = False - - else: - logger.info("Multiple replicates specified " - "so processing as a replicated experiment.") - replicated = True - - if no_unique_controls == 1 and replicated: - logger.info("Only a single control was specified " - "so using same control for replicates, pool and psuedoreplicates.") - single_control = True - else: - logger.info("Will merge only unique controls for pooled.") - single_control = False - - # Pool the controls for checking - if not single_control: - control_df = get_read_count_ratio(design_df) - control_files = design_df.control_tag_align.unique() - pool_control = pool(control_files, "pool_control", paired) - else: - pool_control = design_df.control_tag_align.unique()[0] - - # if paired_end make tagAlign - if paired: - pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") - pool_control = pool_control_tmp - - # Psuedoreplicate and update design accordingly if not replicated: # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data @@ -282,7 +234,6 @@ def main(): replicate_prefix = experiment_id + '_' + str(rep) pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired) pseudoreplicates_dict[rep] = pr_dict - # Merge self psuedoreplication for each true replicate pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict) pool_pseudoreplicates_dict = {} @@ -356,5 +307,61 @@ def main(): header=True, sep='\t', index=False) +def main(): + args = get_args() + paired = args.paired + design = args.design + cutoff_ratio = args.cutoff + + # Create a file handler + handler = logging.FileHandler('experiment_generation.log') + logger.addHandler(handler) + + # Read files as dataframes + design_df = pd.read_csv(design, sep='\t') + + # Check Number of replicates and replicates + no_reps = check_replicates(design_df) + no_unique_controls = check_controls(design_df) + + if no_reps == 1: + logger.info("No other replicate specified " + "so processing as an unreplicated experiment.") + replicated = False + + else: + logger.info("Multiple replicates specified " + "so processing as a replicated experiment.") + replicated = True + + if no_unique_controls == 1 and replicated: + logger.info("Only a single control was specified " + "so using same control for replicates, pool and psuedoreplicates.") + single_control = True + else: + logger.info("Will merge only unique controls for pooled.") + single_control = False + + # Pool the controls for checking + if not single_control: + control_df = get_read_count_ratio(design_df) + control_files = design_df.control_tag_align.unique() +l_df = get_read_count_ratio(design_df) + control_files = design_df.control_tag_align.unique() + pool_control = pool(control_files, "pool_control", paired) + + pool_control = pool(control_files, "pool_control", paired) + else: + pool_control = design_df.control_tag_align.unique()[0] + + # if paired_end make tagAlign + if paired: + pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") + pool_control = pool_control_tmp + + # Psuedoreplicate and update design accordingly + generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio) + + if __name__ == '__main__': main() diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py index 31fffc57e7eaa598a933c97c92b1c901ba731ba7..58ceda9df518d61c0dda9d3a2680fa69c652678b 100644 --- a/workflow/tests/test_pool_and_psuedoreplicate.py +++ b/workflow/tests/test_pool_and_psuedoreplicate.py @@ -60,6 +60,14 @@ def test_check_controls_single(design_experiment_3): assert no_controls == 1 +@pytest.mark.unit +def test_generate_design(design_experiment_2) + control_df = pool_and_psuedoreplicate.get_read_count_ratio(design_experiment_2) + control_files = design_experiment_2.control_tag_align.unique() + pool_control = pool_and_psuedoreplicate.pool(control_files, "pool_control", true) + new_design = pool_and_psuedoreplicate.generate_design(design_experiment_2, false, false, pool_control, true, 1.2) + + @pytest.mark.singleend def test_pool_and_psuedoreplicate_singleend(): design_file = os.path.join(test_output_path, 'ENCSR238SGC_ppr.tsv')