diff --git a/workflow/scripts/runMemechip.py b/workflow/scripts/runMemechip.py new file mode 100644 index 0000000000000000000000000000000000000000..d0f66f60abd60aa2bb2ca64af270b7e2bf82b97d --- /dev/null +++ b/workflow/scripts/runMemechip.py @@ -0,0 +1,64 @@ +#!/qbrc/software/Python-2.7.7/bin/python +# programmer : bbc +# usage: + +import sys +import re +from re import sub +import string +import argparse as ap +import logging +#import twobitreader +import pybedtools +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +logging.basicConfig(level=10) + + +def prepare_argparser(): + description = "Scan potential sgRNA in given fasta" + epilog = "For command line options of each command, type %(prog)% COMMAND -h" + argparser = ap.ArgumentParser(description=description, epilog = epilog) + argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file") + argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output") + argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file") + argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N") + argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks") + return(argparser) + +def rc(): + comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"} + return ''.join([comps[x] for x in seq.upper()[::-1]]) + +def main(): + argparser = prepare_argparser() + args = argparser.parse_args() + run(args.infile, args.genome, args.limit, args.output) + +def run(infile, genome, limit, output) + infile = pybedtools.BedTool(infile) + #genome = twobitreader.TwoBitFile(genome) + output = open(args.outfile,"w") + rowcount = 1 + for record in infile: + while rowcount <=limit: + rowcount += 1 + try: + seq = genome[record.chrom][record.start:record.stop] + except: + pass + else: + if record.strand == "-": + seq = rc(seq) + newfa_name = record.name#"_".join(record.fields) + newfa = SeqRecord(Seq(seq),newfa_name,description="") + SeqIO.write(newfa,output+".fa","fasta") + output.close() + #Call memechip + meme_command = "meme-chip -oc "+output+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10" + p = subprocess.Popen(meme_command, shell=True) + p.communicate() + +if __name__=="__main__": + main()