Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from Venkat Malladi / TFSEE
2 commits ahead of the upstream repository.
fpkm.py 2.38 KiB
#!/usr/bin/env python

# -*- coding: latin-1 -*-
'''Take an gt average FPKM from list of RSEM output'''

EPILOG = '''
For more details:
        %(prog)s --help
'''

import numpy as np
import pandas as pd
import argparse
import csv


def get_args():
    parser = argparse.ArgumentParser(
            description=__doc__, epilog=EPILOG,
            formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('-e','--experiments',
    	help="Comma separated file of experiment name followed by file location.",
        required = True)
    parser.add_argument('-f','--factor',
    	help="Factor that is being analyzed.",
        required = True)
    args = parser.parse_args()
    return args



def main():
    args = get_args()
    experiment_dict = csv.DictReader(open(args.experiments))

    # Loop through all the files
    fpkm_columns = []
    fpkm_all = pd.DataFrame()
    fpkm_err = pd.DataFrame()
    for exp in experiment_dict:
        experiment = exp['experiment']
        fpkm_columns.append(experiment)

        rep1 = pd.read_csv(exp['file1'], sep='\t')
        rep2 = pd.read_csv(exp['file2'], sep='\t')

        # Make mean of replicate 1 and 2
        input_columns = ['FPKM']
        rep1_index = rep1.gene_id.values
        rep2_index = rep2.gene_id.values
        tmp = pd.DataFrame(rep1, columns=input_columns)
        rep1_tmp  = tmp.set_index(rep1_index)
        rep1_tmp.columns = ['Rep1_FPKM']
        tmp = pd.DataFrame(rep2, columns=input_columns)
        rep2_tmp  = tmp.set_index(rep2_index)
        rep2_tmp.columns = ['Rep2_FPKM']
        result = pd.concat([rep1_tmp, rep2_tmp],axis=1)
        fpkm_mean = result.mean(axis=1)
        fpkm_mean.columns= [experiment]
        fpkm_std = result.std(axis=1)
        fpkm_std.columns= [experiment]

        # Add values to update
        if fpkm_all.empty:
            fpkm_all = fpkm_mean
        else:
            fpkm_tmp = pd.concat([fpkm_all, fpkm_mean],axis=1)
            fpkm_all = fpkm_tmp

        if fpkm_err.empty:
            fpkm_err = fpkm_std
        else:
            fpkm_tmp = pd.concat([fpkm_err, fpkm_std],axis=1)
            fpkm_err = fpkm_tmp

        fpkm_all.columns = fpkm_columns
        fpkm_err.columns = fpkm_columns

    fpkm_all.to_csv(args.factor + ".tsv", index_label="gene_id",sep='\t')
    fpkm_err.to_csv(args.factor + "_err.tsv", index_label="gene_id",sep='\t')


if __name__ == '__main__':
    main()