-
Venkat Malladi authoredac7a8825
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from
Venkat Malladi / TFSEE
2 commits ahead of the upstream repository.
fpkm.py 2.38 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
# -*- coding: latin-1 -*-
'''Take an gt average FPKM from list of RSEM output'''
EPILOG = '''
For more details:
%(prog)s --help
'''
import numpy as np
import pandas as pd
import argparse
import csv
def get_args():
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('-e','--experiments',
help="Comma separated file of experiment name followed by file location.",
required = True)
parser.add_argument('-f','--factor',
help="Factor that is being analyzed.",
required = True)
args = parser.parse_args()
return args
def main():
args = get_args()
experiment_dict = csv.DictReader(open(args.experiments))
# Loop through all the files
fpkm_columns = []
fpkm_all = pd.DataFrame()
fpkm_err = pd.DataFrame()
for exp in experiment_dict:
experiment = exp['experiment']
fpkm_columns.append(experiment)
rep1 = pd.read_csv(exp['file1'], sep='\t')
rep2 = pd.read_csv(exp['file2'], sep='\t')
# Make mean of replicate 1 and 2
input_columns = ['FPKM']
rep1_index = rep1.gene_id.values
rep2_index = rep2.gene_id.values
tmp = pd.DataFrame(rep1, columns=input_columns)
rep1_tmp = tmp.set_index(rep1_index)
rep1_tmp.columns = ['Rep1_FPKM']
tmp = pd.DataFrame(rep2, columns=input_columns)
rep2_tmp = tmp.set_index(rep2_index)
rep2_tmp.columns = ['Rep2_FPKM']
result = pd.concat([rep1_tmp, rep2_tmp],axis=1)
fpkm_mean = result.mean(axis=1)
fpkm_mean.columns= [experiment]
fpkm_std = result.std(axis=1)
fpkm_std.columns= [experiment]
# Add values to update
if fpkm_all.empty:
fpkm_all = fpkm_mean
else:
fpkm_tmp = pd.concat([fpkm_all, fpkm_mean],axis=1)
fpkm_all = fpkm_tmp
if fpkm_err.empty:
fpkm_err = fpkm_std
else:
fpkm_tmp = pd.concat([fpkm_err, fpkm_std],axis=1)
fpkm_err = fpkm_tmp
fpkm_all.columns = fpkm_columns
fpkm_err.columns = fpkm_columns
fpkm_all.to_csv(args.factor + ".tsv", index_label="gene_id",sep='\t')
fpkm_err.to_csv(args.factor + "_err.tsv", index_label="gene_id",sep='\t')
if __name__ == '__main__':
main()