spike_in_correlation.py 1.69 KB
Newer Older
Venkat Malladi's avatar
Venkat Malladi committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#!/usr/bin/env python

# -*- coding: latin-1 -*-
'''Take an Counts file and plot Spikein'''

EPILOG = '''
For more details:
        %(prog)s --help
'''


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
import argparse

def get_args():
    parser = argparse.ArgumentParser(
            description=__doc__, epilog=EPILOG,
            formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('-c', '--counts',
    	help="The file with counts values.",
        required = True)
    parser.add_argument('-f','--factor',
    	help="Factor that is being analyzed.",
        required = True)
    args = parser.parse_args()
    return args

def main():

    args = get_args()
    spike_in = pd.read_csv('ERCC_spikein/spike_in_concentrations.tsv', sep='\t')
    counts = pd.read_csv(args.counts, sep='\t')

    spike_in_counts = pd.merge(counts,spike_in ,on='ERCC ID')
    spike_in_counts.columns = ['ERCC ID', 'Counts', 'Re-sort ID','subgroup','Mix1', 'Mix2', 'fold-change','mix1/mix2']

    test = pd.DataFrame()
    test['Counts'] = np.log2(spike_in_counts['Counts']+1)
    test['Mix1'] = np.log2(spike_in_counts['Mix1'])

    x = test['Mix1'].values
    y = test['Counts'].values
    x = x.reshape(92, 1)
    y = y.reshape(92, 1)
    regr = linear_model.LinearRegression()
    regr.fit(x,y)
    cor = regr.coef_

    test.plot.scatter(x='Mix1', y='Counts')
    plt.plot(x, regr.predict(x), color='blue',linewidth=3,label="Correlation r = %.4f"%(cor))
    plt.legend(loc=2)
    plt.xlabel('log2(Mix1)')
    plt.ylabel('log2(Counts+1)')
    plt.savefig('figures/' + args.factor + '_counts.png')
    plt.clf()


if __name__ == '__main__':
    main()