Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from BICF / Astrocyte / chipseq_analysis
789 commits behind the upstream repository.
runMemechip.py 2.15 KiB
#!/qbrc/software/Python-2.7.7/bin/python
# programmer : bbc
# usage:

import sys
import re
from re import sub
import string
import argparse as ap
import logging
#import twobitreader
import pybedtools
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
logging.basicConfig(level=10)


def prepare_argparser():
  description = "Scan potential sgRNA in given fasta"
  epilog = "For command line options of each command, type %(prog)% COMMAND -h"
  argparser = ap.ArgumentParser(description=description, epilog = epilog)
  argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file")
  argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
  argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
  argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
  argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
  return(argparser)

def rc():
  comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"}
  return ''.join([comps[x] for x in seq.upper()[::-1]])

def main():
  argparser = prepare_argparser()
  args = argparser.parse_args()
  run(args.infile, args.genome, args.limit, args.output)

def run(infile, genome, limit, output):
  infile = pybedtools.BedTool(infile)
  #genome = twobitreader.TwoBitFile(genome)
  output = open(output+".fa","w")
  rowcount = 1
  if limit ==-1:
    limit = len(infile)
  for record in infile:
    while rowcount <=limit:
      rowcount += 1
      try:
        seq = genome[record.chrom][record.start:record.stop]
      except:
        pass
      else:
        if record.strand == "-":
          seq = rc(seq)
        newfa_name = record.name#"_".join(record.fields)
        newfa = SeqRecord(Seq(seq),newfa_name,description="")
        SeqIO.write(newfa,output+".fa","fasta")
    output.close()
  #Call memechip
  meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa"
  p = subprocess.Popen(meme_command, shell=True)
  p.communicate()
 
if __name__=="__main__":
  main()