Skip to content
Snippets Groups Projects
Commit 0158a2a0 authored by Jeremy Mathews's avatar Jeremy Mathews
Browse files

test pool and psuedoreplicate for single experiment

parent a9bc7536
Branches
Tags
No related merge requests found
......@@ -172,58 +172,10 @@ def self_psuedoreplication(tag_file, prefix, paired):
return pseudoreplicate_dict
def main():
args = get_args()
paired = args.paired
design = args.design
cutoff_ratio = args.cutoff
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(design, sep='\t')
def generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio):
# Get current directory to build paths
cwd = os.getcwd()
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
no_unique_controls = check_controls(design_df)
if no_reps == 1:
logger.info("No other replicate specified "
"so processing as an unreplicated experiment.")
replicated = False
else:
logger.info("Multiple replicates specified "
"so processing as a replicated experiment.")
replicated = True
if no_unique_controls == 1 and replicated:
logger.info("Only a single control was specified "
"so using same control for replicates, pool and psuedoreplicates.")
single_control = True
else:
logger.info("Will merge only unique controls for pooled.")
single_control = False
# Pool the controls for checking
if not single_control:
control_df = get_read_count_ratio(design_df)
control_files = design_df.control_tag_align.unique()
pool_control = pool(control_files, "pool_control", paired)
else:
pool_control = design_df.control_tag_align.unique()[0]
# if paired_end make tagAlign
if paired:
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
# Psuedoreplicate and update design accordingly
if not replicated:
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
......@@ -282,7 +234,6 @@ def main():
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Merge self psuedoreplication for each true replicate
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
......@@ -356,5 +307,61 @@ def main():
header=True, sep='\t', index=False)
def main():
args = get_args()
paired = args.paired
design = args.design
cutoff_ratio = args.cutoff
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(design, sep='\t')
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
no_unique_controls = check_controls(design_df)
if no_reps == 1:
logger.info("No other replicate specified "
"so processing as an unreplicated experiment.")
replicated = False
else:
logger.info("Multiple replicates specified "
"so processing as a replicated experiment.")
replicated = True
if no_unique_controls == 1 and replicated:
logger.info("Only a single control was specified "
"so using same control for replicates, pool and psuedoreplicates.")
single_control = True
else:
logger.info("Will merge only unique controls for pooled.")
single_control = False
# Pool the controls for checking
if not single_control:
control_df = get_read_count_ratio(design_df)
control_files = design_df.control_tag_align.unique()
l_df = get_read_count_ratio(design_df)
control_files = design_df.control_tag_align.unique()
pool_control = pool(control_files, "pool_control", paired)
pool_control = pool(control_files, "pool_control", paired)
else:
pool_control = design_df.control_tag_align.unique()[0]
# if paired_end make tagAlign
if paired:
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
# Psuedoreplicate and update design accordingly
generate_design(design_df, replicated, single_control, pool_control, paired, cutoff_ratio)
if __name__ == '__main__':
main()
......@@ -60,6 +60,14 @@ def test_check_controls_single(design_experiment_3):
assert no_controls == 1
@pytest.mark.unit
def test_generate_design(design_experiment_2)
control_df = pool_and_psuedoreplicate.get_read_count_ratio(design_experiment_2)
control_files = design_experiment_2.control_tag_align.unique()
pool_control = pool_and_psuedoreplicate.pool(control_files, "pool_control", true)
new_design = pool_and_psuedoreplicate.generate_design(design_experiment_2, false, false, pool_control, true, 1.2)
@pytest.mark.singleend
def test_pool_and_psuedoreplicate_singleend():
design_file = os.path.join(test_output_path, 'ENCSR238SGC_ppr.tsv')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment