Skip to content
Snippets Groups Projects
runDeepTools.py 3.5 KiB
Newer Older
#!/usr/bin/python
# programmer : bbc
# usage:

import sys
import argparse as ap
import logging
import subprocess
import pandas as pd
from multiprocessing import Pool
logging.basicConfig(level=10)


def prepare_argparser():
  description = "Make wig file for given bed using bam"
  epilog = "For command line options of each command, type %(prog)% COMMAND -h"
  argparser = ap.ArgumentParser(description=description, epilog = epilog)
  argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BAM file")
  argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19")
  #argparser.add_argument("-b","--bed",dest="bedfile",type=str,required=True, help = "Gene locus in bed format")
  #argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"])
  #argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header")
  return(argparser)

def run_qc(files, controls, labels):
  mbs_command = "multiBamSummary bins --bamfiles "+' '.join(files)+" -out sample_mbs.npz"
  p = subprocess.Popen(mbs_command, shell=True)
  #logging.debug(mbs_command)
  p.communicate()
  pcor_command = "plotCorrelation -in sample_mbs.npz --corMethod spearman --skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers  -o experiment.deeptools.heatmap_spearmanCorr_readCounts_v2.png --labels "+" ".join(labels)
  #logging.debug(pcor_command)
  p = subprocess.Popen(pcor_command, shell=True)
  p.communicate()
  #plotCoverage
  pcov_command = "plotCoverage -b "+" ".join(files)+" --plotFile experiment.deeptools_coverage.png -n 1000000 --plotTitle \"sample coverage\" --ignoreDuplicates --minMappingQuality 10"
  p = subprocess.Popen(pcov_command, shell=True)
  p.communicate()
  #draw fingerprints plots
  for treat,ctrl,name in zip(files,controls,labels):
    fp_command = "plotFingerprint -b "+treat+" "+ctrl+" --labels "+name+" control --plotFile "+name+".deeptools_fingerprints.png"
    p = subprocess.Popen(fp_command, shell=True)
    p.communicate()

def bam2bw_wrapper(command):
  p = subprocess.Popen(command, shell=True)
  p.communicate()

def run_signal(files, labels, genome):
  #compute matrix and draw profile and heatmap
  gene_bed = genome+"/gene.bed"#"/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/"+genome+"/gene.bed"
  bw_commands = []
  for f in files:
    bw_commands.append("bamCoverage -bs 10 -b "+f+" -o "+f.replace("bam","bw"))
  work_pool = Pool(min(len(files), 12))
  work_pool.map(bam2bw_wrapper, bw_commands)
  work_pool.close()
  work_pool.join()

  cm_command = "computeMatrix scale-regions -R "+gene_bed+" -a 3000 -b 3000 --regionBodyLength 5000 --skipZeros -S *.bw -o samples.deeptools_generegionscalematrix.gz"
  p = subprocess.Popen(cm_command, shell=True)
  p.communicate()
  hm_command = "plotHeatmap -m samples.deeptools_generegionscalematrix.gz -out samples.deeptools_readsHeatmap.png"
  p = subprocess.Popen(hm_command, shell=True)
  p.communicate()

def run(dfile,genome):
  #parse dfile, suppose data files are the same folder as design file
  dfile = pd.read_csv(dfile)
  #QC: multiBamSummary and plotCorrelation
  run_qc(dfile['bamReads'], dfile['bamControl'], dfile['SampleID'])
  #signal plots
  run_signal(dfile['bamReads'],dfile['SampleID'],genome)

def main():
  argparser = prepare_argparser()
  args = argparser.parse_args()
  run(args.infile, args.genome)


if __name__=="__main__":
  main()