An error occurred while loading the file. Please try again.
-
Venkat Malladi authored87996cff
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from
BICF / Astrocyte / chipseq_analysis
624 commits behind the upstream repository.
experiment_design.py 2.02 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
'''Generate experiment design files for downstream processing.'''
import argparse
import logging
import pandas as pd
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to make experiemnts (tsv format).",
required=True)
args = parser.parse_args()
return args
def update_controls(design):
'''Update design file to append controls list.'''
logger.info("Running control file update.")
file_dict = design[['sample_id', 'tag_align']] \
.set_index('sample_id').T.to_dict()
design['control_tag_align'] = design['control_id'] \
.apply(lambda x: file_dict[x]['tag_align'])
logger.info("Removing rows that are there own control.")
design = design[design['control_id'] != design['sample_id']]
return design
def make_experiment_design(design):
'''Make design file by grouping for each experiment'''
logger.info("Running experiment design generation.")
for experiment, df_experiment in design.groupby('experiment_id'):
experiment_file = experiment + '.tsv'
df_experiment.to_csv(experiment_file, header=True, sep='\t', index=False)
def main():
args = get_args()
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files
design_file = pd.read_csv(args.design, sep='\t')
# Update design file for check_controls
new_design = update_controls(design_file)
# write out experiment design files
make_experiment_design(new_design)
if __name__ == '__main__':
main()