Skip to content
Snippets Groups Projects
runMemechip.py 2.82 KiB
Newer Older
Beibei Chen's avatar
Beibei Chen committed
#!/qbrc/software/Python-2.7.7/bin/python
# programmer : bbc
# usage:

import sys
import re
from re import sub
import string
import argparse as ap
import logging
Beibei Chen's avatar
Beibei Chen committed
import twobitreader
import subprocess
Beibei Chen's avatar
Beibei Chen committed
import pandas as pd
Beibei Chen's avatar
Beibei Chen committed
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
Beibei Chen's avatar
Beibei Chen committed
from multiprocessing import Pool
Beibei Chen's avatar
Beibei Chen committed
logging.basicConfig(level=10)


def prepare_argparser():
Beibei Chen's avatar
Beibei Chen committed
  description = "Run memechip command"
Beibei Chen's avatar
Beibei Chen committed
  epilog = "For command line options of each command, type %(prog)% COMMAND -h"
  argparser = ap.ArgumentParser(description=description, epilog = epilog)
  argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file")
Beibei Chen's avatar
Beibei Chen committed
#  argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
Beibei Chen's avatar
Beibei Chen committed
  argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
Beibei Chen's avatar
Beibei Chen committed
 # argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
Beibei Chen's avatar
Beibei Chen committed
  argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
  return(argparser)

def rc():
  comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"}
  return ''.join([comps[x] for x in seq.upper()[::-1]])

def main():
  argparser = prepare_argparser()
  args = argparser.parse_args()
Beibei Chen's avatar
Beibei Chen committed
  #run(args.infile, args.genome, args.limit, args.output)
  #get Pool ready
  dfile = pd.read_csv(args.infile)
Beibei Chen's avatar
Beibei Chen committed
  meme_arglist =  zip(dfile['Peaks'].tolist(),[args.genome+"/genome.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist())  
Beibei Chen's avatar
Beibei Chen committed
  work_pool = Pool(min(12,dfile.shape[0]))
  resultList =  work_pool.map(run_wrapper, meme_arglist)
  work_pool.close()
  work_pool.join()


def run_wrapper(args):
  run(*args)

Beibei Chen's avatar
Beibei Chen committed

def run(infile, genome, limit, output):
Beibei Chen's avatar
Beibei Chen committed
  infile = pybedtools.BedTool(infile)
Beibei Chen's avatar
Beibei Chen committed
  logging.debug(len(infile))
Beibei Chen's avatar
Beibei Chen committed
  genome = twobitreader.TwoBitFile(genome)
  outfile = open(output+".fa","w")
Beibei Chen's avatar
Beibei Chen committed
  rowcount = 0
Beibei Chen's avatar
Beibei Chen committed
  limit = int(limit)
Beibei Chen's avatar
Beibei Chen committed
  logging.debug(limit)
  if limit ==-1:
    limit = len(infile)
Beibei Chen's avatar
Beibei Chen committed
  for record in infile:
Beibei Chen's avatar
Beibei Chen committed
    rowcount += 1   
    #logging.debug(record) 
Beibei Chen's avatar
Beibei Chen committed
    if rowcount <=limit:
Beibei Chen's avatar
Beibei Chen committed
      #logging.debug(rowcount)
Beibei Chen's avatar
Beibei Chen committed
      try:
Beibei Chen's avatar
Beibei Chen committed
        #logging.debug(record.chrom)
Beibei Chen's avatar
Beibei Chen committed
        seq = genome[record.chrom][record.start:record.stop]
      except:
        pass
      else:
        if record.strand == "-":
          seq = rc(seq)
Beibei Chen's avatar
Beibei Chen committed
        if len(record.fields)>=4:
          newfa_name = record.name
        else:
          newfa_name = "_".join(record.fields)
Beibei Chen's avatar
Beibei Chen committed
        newfa = SeqRecord(Seq(seq),newfa_name,description="")
Beibei Chen's avatar
Beibei Chen committed
        #logging.debug(seq)
Beibei Chen's avatar
Beibei Chen committed
        SeqIO.write(newfa,outfile,"fasta")
  outfile.close()
Beibei Chen's avatar
Beibei Chen committed
  #Call memechip
  meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa"
Beibei Chen's avatar
Beibei Chen committed
  p = subprocess.Popen(meme_command, shell=True)
  p.communicate()
 
if __name__=="__main__":
  main()