Skip to content
Snippets Groups Projects
Commit a9bfa737 authored by Jeremy Mathews's avatar Jeremy Mathews
Browse files

Final Revision: Remove If-Else Statement (Redundant)

parent 596e145d
1 merge request!54Resolve "Single control or experiment fails at call peaks macs"
Pipeline #4537 passed with stages
in 1 day, 5 minutes, and 34 seconds
...@@ -204,77 +204,32 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con ...@@ -204,77 +204,32 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp pool_control = pool_control_tmp
if not replicated: # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data replicate_files = design_df.tag_align.unique()
experiment_id = design_df.at[0, 'experiment_id'] pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
replicate = design_df.at[0, 'replicate']
design_new_df = design_df.loc[np.repeat(design_df.index, 4)].reset_index() # Make 2 self psuedoreplicates
pool_experiment = design_df.tag_align.unique() pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
# Update tagAlign with single end data replicate_prefix = experiment_id + '_' + str(rep)
if paired: pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
design_new_df['tag_align'] = design_new_df['se_tag_align'] pseudoreplicates_dict[rep] = pr_dict
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# Update design to include new self pseudo replicates
design_new_df['replicate'] = design_new_df['replicate'].astype(str) pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
design_new_df.at[1, 'sample_id'] = experiment_id + '_pr1' pool_pseudoreplicates_dict = {}
design_new_df.at[1, 'replicate'] = '1_pr' for index, row in pseudoreplicates_df.iterrows():
design_new_df.at[1, 'xcor'] = 'Calculate' replicate_id = index + 1
design_new_df.at[2, 'sample_id'] = experiment_id + '_pr2' pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
design_new_df.at[2, 'replicate'] = '2_pr' pool_replicate = pool(row, pr_filename, False)
design_new_df.at[2, 'xcor'] = 'Calculate' pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df.at[3, 'sample_id'] = experiment_id + '_pooled'
design_new_df.at[3, 'replicate'] = 'pooled' design_new_df = design_df #.loc[np.repeat(design_df.index, 4)].reset_index()
design_new_df.at[3, 'xcor'] = 'Calculate' # Update tagAlign with single end data
design_new_df.at[3, 'tag_align'] = design_new_df.at[0, 'tag_align'] if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
# Make 2 self psuedoreplicates design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
self_pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
self_pseudoreplicates_dict[rep] = self_psuedoreplication(tag_file, replicate_prefix, paired)
# Update design to include new self pseudo replicates
self_pseudoreplicates_df = pd.DataFrame.from_dict(self_pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for rep, pseudorep_file in self_pseudoreplicates_df.iterrows():
path_to_file = cwd + '/' + pseudorep_file
replicate = rep + 1
design_new_df.loc[replicate, 'tag_align'] = path_to_file
pool_pseudoreplicates_dict[replicate] = path_to_file
# Drop index column
design_new_df.drop(labels='index', axis=1, inplace=True)
else:
# Make pool of replicates
replicate_files = design_df.tag_align.unique()
experiment_id = design_df.at[0, 'experiment_id']
pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
# Make self psuedoreplicates equivalent to number of replicates
pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Merge self psuedoreplication for each true replicate
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for index, row in pseudoreplicates_df.iterrows():
replicate_id = index + 1
pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
pool_replicate = pool(row, pr_filename, False)
pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df = design_df
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# If paired change to single End # If paired change to single End
if paired: if paired:
......
...@@ -70,7 +70,6 @@ def test_single_rep(design_experiment_2): ...@@ -70,7 +70,6 @@ def test_single_rep(design_experiment_2):
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd) shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd) shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1) single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
print(single_rep)
assert single_rep.shape[0] == 4 assert single_rep.shape[0] == 4
@pytest.mark.singleend @pytest.mark.singleend
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment