Newer
Older
#!/usr/bin/env python
# -*- coding: latin-1 -*-
'''Take an TSV file make a plot graph'''
EPILOG = '''
For more details:
%(prog)s --help
'''
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import argparse
import seaborn as sns
def get_args():
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('-r', '--rpkm',
help="The file with RPKM values.",
required = True)
parser.add_argument('-c','--color',
help="The hex color to make the graph",
required = True)
parser.add_argument('-f','--factor',
help="Factor that is being analyzed.",
required = True)
parser.add_argument('-l','--limit',
help="The RPKM limit to plot",
parser.add_argument('-p', '--protein',
help="The file with Protein coding gnese.")
args = parser.parse_args()
return args
def main():
sns.set_style("white")
sns.set_style("ticks")
args = get_args()
rpkm_file = pd.read_csv(args.rpkm, sep='\t')
if args.protein is not None:
pc_genes = pd.read_csv(args.protein, sep='\t', header=None)
filtered_rpkm = rpkm_file[rpkm_file['gene_id'].isin(pc_genes[0].values)]
else:
filtered_rpkm = rpkm_file
locations = np.array(filtered_rpkm['ES_D0'])
locations = np.append(locations, np.array(filtered_rpkm['ES_D2']))
locations = np.append(locations, np.array(filtered_rpkm['ES_D5']))
locations = np.append(locations, np.array(filtered_rpkm['ES_D7']))
locations = np.append(locations, np.array(filtered_rpkm['ES_D10']))
sns.kdeplot(np.log2(locations[locations !=0] + 0.00001),color=args.color)
sns.despine()
plt.axvline(np.log2(args.limit), color='black', linestyle='dashed', linewidth=1)
plt.savefig(args.factor + '_distribution.png')
plt.clf()
if __name__ == '__main__':
main()