diff --git a/H3K27ac_distribution.png b/H3K27ac_distribution.png new file mode 100644 index 0000000000000000000000000000000000000000..b98db091c11c8c082fc22ff5933da5be57a29e19 Binary files /dev/null and b/H3K27ac_distribution.png differ diff --git a/H3K4me3_distribution.png b/H3K4me3_distribution.png new file mode 100644 index 0000000000000000000000000000000000000000..6ac03853fd8fca931b5a86c8f1386c7655b8726e Binary files /dev/null and b/H3K4me3_distribution.png differ diff --git a/cutoff_analysis.py b/cutoff_analysis.py new file mode 100755 index 0000000000000000000000000000000000000000..fe65ead8cdb43161cce943369c06677cf128c3fb --- /dev/null +++ b/cutoff_analysis.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +# -*- coding: latin-1 -*- +'''Take an TSV file make a plot graph''' + +EPILOG = ''' +For more details: + %(prog)s --help +''' + + +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import argparse +import seaborn as sns + +def get_args(): + parser = argparse.ArgumentParser( + description=__doc__, epilog=EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument('-r', '--rpkm', + help="The file with RPKM values.", + required = True) + parser.add_argument('-c','--color', + help="The hex color to make the graph", + required = True) + parser.add_argument('-f','--factor', + help="Factor that is being analyzed.", + required = True) + parser.add_argument('-l','--limit', + help="The RPKM limit to plot", + type=int, + required = True) + args = parser.parse_args() + return args + +def main(): + sns.set_style("white") + sns.set_style("ticks") + + args = get_args() + rpkm_file = pd.read_csv(args.rpkm, sep='\t') + + locations = np.array(rpkm_file['ES_D0']) + locations = np.append(locations, np.array(rpkm_file['ES_D2'])) + locations = np.append(locations, np.array(rpkm_file['ES_D5'])) + locations = np.append(locations, np.array(rpkm_file['ES_D7'])) + locations = np.append(locations, np.array(rpkm_file['ES_D10'])) + + sns.kdeplot(np.log2(locations[locations !=0 ] + 0.00001),color=args.color) + sns.despine() + plt.axvline(np.log2(args.limit), color='black', linestyle='dashed', linewidth=1) + plt.savefig(args.factor + '_distribution.png') + plt.clf() + + +if __name__ == '__main__': + main() diff --git a/h3k27ac_processing.sh b/h3k27ac_processing.sh index 589f79428db296d2cec9604908a02d4b9610e4aa..8d8e5b8078c4867767271f7c1d77ca22c6af7d70 100644 --- a/h3k27ac_processing.sh +++ b/h3k27ac_processing.sh @@ -42,3 +42,9 @@ bedtools intersect -a /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT # Get RPKM ./rpkm.py --peaks universe_enhancer_H3K27ac.bed --experiments h3k27ac_list.csv -f H3K27ac + +./rpkm.py --peaks universe_enhancer_H3K27ac.bed --experiments h3k27ac_list.csv -f H3K27ac_all --minimum 0 + + +# Graph cutoffs +./cutoff_analysis.py --rpkm H3K27ac_all_filtered_peaks.tsv --color '#008D14' --factor H3K27ac -l 1 diff --git a/h3k4me3_processing.sh b/h3k4me3_processing.sh index 8d9ee3d7045d00da5c3f8a51232ac9490da96307..181c4e1668150035f484ea2afa755d88623e9642 100644 --- a/h3k4me3_processing.sh +++ b/h3k4me3_processing.sh @@ -35,3 +35,7 @@ rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe # Get RPKM ./rpkm.py --peaks /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me3/universe_peaks.merge.bed --experiments h3k4me3_list.csv -f H3K4me3 +./rpkm.py --peaks /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me3/universe_peaks.merge.bed --experiments h3k4me3_list.csv -f H3K4me3_all --minimum 0 + +# Graph cutoffs +./cutoff_analysis.py --rpkm H3K4me3_all_filtered_peaks.tsv --color '#FF7C21' --factor H3K4me3 -l 1