#!/usr/bin/env python # -*- coding: latin-1 -*- '''Take an TSV file make a plot graph''' EPILOG = ''' For more details: %(prog)s --help ''' import pandas as pd import matplotlib.pyplot as plt import numpy as np import argparse import seaborn as sns def get_args(): parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('-r', '--rpkm', help="The file with RPKM values.", required = True) parser.add_argument('-c','--color', help="The hex color to make the graph", required = True) parser.add_argument('-f','--factor', help="Factor that is being analyzed.", required = True) parser.add_argument('-l','--limit', help="The RPKM limit to plot", type=float, required = True) parser.add_argument('-p', '--protein', help="The file with Protein coding gnese.") args = parser.parse_args() return args def main(): sns.set_style("white") sns.set_style("ticks") args = get_args() rpkm_file = pd.read_csv(args.rpkm, sep='\t') if args.protein is not None: pc_genes = pd.read_csv(args.protein, sep='\t', header=None) filtered_rpkm = rpkm_file[rpkm_file['gene_id'].isin(pc_genes[0].values)] else: filtered_rpkm = rpkm_file locations = np.array(filtered_rpkm['ES_D0']) locations = np.append(locations, np.array(filtered_rpkm['ES_D2'])) locations = np.append(locations, np.array(filtered_rpkm['ES_D5'])) locations = np.append(locations, np.array(filtered_rpkm['ES_D7'])) locations = np.append(locations, np.array(filtered_rpkm['ES_D10'])) sns.kdeplot(np.log2(locations[locations !=0] + 0.00001),color=args.color) sns.despine() plt.axvline(np.log2(args.limit), color='black', linestyle='dashed', linewidth=1) plt.savefig(args.factor + '_distribution.png') plt.clf() if __name__ == '__main__': main()