From e56be52c1436d999323d364220497b427addc42e Mon Sep 17 00:00:00 2001 From: Jeremy Mathews <Jeremy.Mathews@utsouthwestern.edu> Date: Tue, 16 Jul 2019 09:46:27 -0500 Subject: [PATCH] create function generate_design. update test --- workflow/scripts/pool_and_psuedoreplicate.py | 73 ++++++++++++++----- .../tests/test_pool_and_psuedoreplicate.py | 4 +- 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py index 4199656..e092864 100644 --- a/workflow/scripts/pool_and_psuedoreplicate.py +++ b/workflow/scripts/pool_and_psuedoreplicate.py @@ -172,26 +172,7 @@ def self_psuedoreplication(tag_file, prefix, paired): return pseudoreplicate_dict -def main(): - args = get_args() - paired = args.paired - design = args.design - cutoff_ratio = args.cutoff - - # Create a file handler - handler = logging.FileHandler('experiment_generation.log') - logger.addHandler(handler) - - # Read files as dataframes - design_df = pd.read_csv(design, sep='\t') - - # Get current directory to build paths - cwd = os.getcwd() - - # Check Number of replicates and replicates - no_reps = check_replicates(design_df) - no_unique_controls = check_controls(design_df) - +def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls): if no_reps == 1: logger.info("No other replicate specified " "so processing as an unreplicated experiment.") @@ -341,6 +322,22 @@ def main(): tmp_metadata['tag_align'] = path_to_file design_new_df = design_new_df.append(tmp_metadata) + # Add in pool experiment + tmp_metadata['sample_id'] = experiment_id + '_pooled' else: + path_to_pool_control = cwd + '/' + pool_control + design_new_df['control_tag_align'] = path_to_pool_control + + # Add in pseudo replicates + tmp_metadata = design_new_df.loc[0].copy() + tmp_metadata['control_tag_align'] = path_to_pool_control + for rep, pseudorep_file in pool_pseudoreplicates_dict.items(): + tmp_metadata['sample_id'] = experiment_id + '_pr' + str(rep) + tmp_metadata['replicate'] = str(rep) + '_pr' + tmp_metadata['xcor'] = 'Calculate' + path_to_file = cwd + '/' + pseudorep_file + tmp_metadata['tag_align'] = path_to_file + design_new_df = design_new_df.append(tmp_metadata) + # Add in pool experiment tmp_metadata['sample_id'] = experiment_id + '_pooled' tmp_metadata['replicate'] = 'pooled' @@ -353,6 +350,42 @@ def main(): design_new_df.to_csv(experiment_id + '_ppr.tsv', header=True, sep='\t', index=False) + tmp_metadata['replicate'] = 'pooled' + tmp_metadata['xcor'] = 'Calculate' + path_to_file = cwd + '/' + pool_experiment_se + tmp_metadata['tag_align'] = path_to_file + design_new_df = design_new_df.append(tmp_metadata) + + return design_new_df + + +def main(): + args = get_args() + paired = args.paired + design = args.design + cutoff_ratio = args.cutoff + + # Create a file handler + handler = logging.FileHandler('experiment_generation.log') + logger.addHandler(handler) + + # Read files as dataframes + design_df = pd.read_csv(design, sep='\t') + + # Get current directory to build paths + cwd = os.getcwd() + + # Check Number of replicates and replicates + no_reps = check_replicates(design_df) + no_unique_controls = check_controls(design_df) + + # Generate new design file + design_new_df = generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls) + + # Write out new dataframe + design_new_df.to_csv(experiment_id + '_ppr.tsv', + header=True, sep='\t', index=False) + if __name__ == '__main__': main() diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py index eff8fa2..b888605 100644 --- a/workflow/tests/test_pool_and_psuedoreplicate.py +++ b/workflow/tests/test_pool_and_psuedoreplicate.py @@ -63,8 +63,8 @@ def test_check_controls_single(design_experiment_3): @pytest.mark.unit def test_single_rep(design_experiment_2): - sys.argv[1](['--design', 'design_experiment_2']) - single_rep = pool_and_psuedoreplicate.main() + cwd = os.getcwd() + single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1) @pytest.mark.singleend -- GitLab