diff --git a/workflow/scripts/runDeepTools.py b/workflow/scripts/runDeepTools.py new file mode 100644 index 0000000000000000000000000000000000000000..800544dc87c89329bf050369367e0cba9b805f28 --- /dev/null +++ b/workflow/scripts/runDeepTools.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# programmer : bbc +# usage: + +import sys +import argparse as ap +import logging +import subprocess +import pandas as pd +from multiprocessing import Pool +logging.basicConfig(level=10) + + +def prepare_argparser(): + description = "Make wig file for given bed using bam" + epilog = "For command line options of each command, type %(prog)% COMMAND -h" + argparser = ap.ArgumentParser(description=description, epilog = epilog) + argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BAM file") + argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19") + #argparser.add_argument("-b","--bed",dest="bedfile",type=str,required=True, help = "Gene locus in bed format") + #argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"]) + #argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header") + return(argparser) + +def run_qc(files, controls, labels): + mbs_command = "multiBamSummary bins --bamfiles "+' '.join(files)+" -out sample_mbs.npz" + p = subprocess.Popen(mbs_command, shell=True) + #logging.debug(mbs_command) + p.communicate() + pcor_command = "plotCorrelation -in sample_mbs.npz --corMethod spearman --skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers -o experiment.deeptools.heatmap_spearmanCorr_readCounts_v2.png --labels "+" ".join(labels) + #logging.debug(pcor_command) + p = subprocess.Popen(pcor_command, shell=True) + p.communicate() + #plotCoverage + pcov_command = "plotCoverage -b "+" ".join(files)+" --plotFile experiment.deeptools_coverage.png -n 1000000 --plotTitle \"sample coverage\" --ignoreDuplicates --minMappingQuality 10" + p = subprocess.Popen(pcov_command, shell=True) + p.communicate() + #draw fingerprints plots + for treat,ctrl,name in zip(files,controls,labels): + fp_command = "plotFingerprint -b "+treat+" "+ctrl+" --labels "+name+" control --plotFile "+name+".deeptools_fingerprints.png" + p = subprocess.Popen(fp_command, shell=True) + p.communicate() + +def bam2bw_wrapper(command): + p = subprocess.Popen(command, shell=True) + p.communicate() + +def run_signal(files, labels, genome): + #compute matrix and draw profile and heatmap + gene_bed = genome+"/gene.bed"#"/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/"+genome+"/gene.bed" + bw_commands = [] + for f in files: + bw_commands.append("bamCoverage -bs 10 -b "+f+" -o "+f.replace("bam","bw")) + work_pool = Pool(min(len(files), 12)) + work_pool.map(bam2bw_wrapper, bw_commands) + work_pool.close() + work_pool.join() + + cm_command = "computeMatrix scale-regions -R "+gene_bed+" -a 3000 -b 3000 --regionBodyLength 5000 --skipZeros -S *.bw -o samples.deeptools_generegionscalematrix.gz" + p = subprocess.Popen(cm_command, shell=True) + p.communicate() + hm_command = "plotHeatmap -m samples.deeptools_generegionscalematrix.gz -out samples.deeptools_readsHeatmap.png" + p = subprocess.Popen(hm_command, shell=True) + p.communicate() + +def run(dfile,genome): + #parse dfile, suppose data files are the same folder as design file + dfile = pd.read_csv(dfile) + #QC: multiBamSummary and plotCorrelation + run_qc(dfile['bamReads'], dfile['bamControl'], dfile['SampleID']) + #signal plots + run_signal(dfile['bamReads'],dfile['SampleID'],genome) + +def main(): + argparser = prepare_argparser() + args = argparser.parse_args() + run(args.infile, args.genome) + + +if __name__=="__main__": + main()