Skip to content
Snippets Groups Projects
Commit a9bfa737 authored by Jeremy Mathews's avatar Jeremy Mathews
Browse files

Final Revision: Remove If-Else Statement (Redundant)

parent 596e145d
Branches
Tags
1 merge request!54Resolve "Single control or experiment fails at call peaks macs"
Pipeline #4537 passed with stages
in 1 day, 5 minutes, and 34 seconds
......@@ -204,77 +204,32 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
if not replicated:
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate = design_df.at[0, 'replicate']
design_new_df = design_df.loc[np.repeat(design_df.index, 4)].reset_index()
pool_experiment = design_df.tag_align.unique()
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
design_new_df['replicate'] = design_new_df['replicate'].astype(str)
design_new_df.at[1, 'sample_id'] = experiment_id + '_pr1'
design_new_df.at[1, 'replicate'] = '1_pr'
design_new_df.at[1, 'xcor'] = 'Calculate'
design_new_df.at[2, 'sample_id'] = experiment_id + '_pr2'
design_new_df.at[2, 'replicate'] = '2_pr'
design_new_df.at[2, 'xcor'] = 'Calculate'
design_new_df.at[3, 'sample_id'] = experiment_id + '_pooled'
design_new_df.at[3, 'replicate'] = 'pooled'
design_new_df.at[3, 'xcor'] = 'Calculate'
design_new_df.at[3, 'tag_align'] = design_new_df.at[0, 'tag_align']
# Make 2 self psuedoreplicates
self_pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
self_pseudoreplicates_dict[rep] = self_psuedoreplication(tag_file, replicate_prefix, paired)
# Update design to include new self pseudo replicates
self_pseudoreplicates_df = pd.DataFrame.from_dict(self_pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for rep, pseudorep_file in self_pseudoreplicates_df.iterrows():
path_to_file = cwd + '/' + pseudorep_file
replicate = rep + 1
design_new_df.loc[replicate, 'tag_align'] = path_to_file
pool_pseudoreplicates_dict[replicate] = path_to_file
# Drop index column
design_new_df.drop(labels='index', axis=1, inplace=True)
else:
# Make pool of replicates
replicate_files = design_df.tag_align.unique()
experiment_id = design_df.at[0, 'experiment_id']
pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
# Make self psuedoreplicates equivalent to number of replicates
pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Merge self psuedoreplication for each true replicate
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for index, row in pseudoreplicates_df.iterrows():
replicate_id = index + 1
pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
pool_replicate = pool(row, pr_filename, False)
pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df = design_df
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate_files = design_df.tag_align.unique()
pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
# Make 2 self psuedoreplicates
pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Update design to include new self pseudo replicates
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for index, row in pseudoreplicates_df.iterrows():
replicate_id = index + 1
pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
pool_replicate = pool(row, pr_filename, False)
pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df = design_df #.loc[np.repeat(design_df.index, 4)].reset_index()
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# If paired change to single End
if paired:
......
......@@ -70,7 +70,6 @@ def test_single_rep(design_experiment_2):
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
print(single_rep)
assert single_rep.shape[0] == 4
@pytest.mark.singleend
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment