From 965294b4da98cd45728330cf99293ebdd29d93ef Mon Sep 17 00:00:00 2001
From: Beibei Chen <beibei.chen@utsouthwestern.edu>
Date: Thu, 29 Dec 2016 14:22:53 -0600
Subject: [PATCH] memechip script done

---
 workflow/scripts/runMemechip.py | 64 +++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 workflow/scripts/runMemechip.py

diff --git a/workflow/scripts/runMemechip.py b/workflow/scripts/runMemechip.py
new file mode 100644
index 0000000..d0f66f6
--- /dev/null
+++ b/workflow/scripts/runMemechip.py
@@ -0,0 +1,64 @@
+#!/qbrc/software/Python-2.7.7/bin/python
+# programmer : bbc
+# usage:
+
+import sys
+import re
+from re import sub
+import string
+import argparse as ap
+import logging
+#import twobitreader
+import pybedtools
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+logging.basicConfig(level=10)
+
+
+def prepare_argparser():
+  description = "Scan potential sgRNA in given fasta"
+  epilog = "For command line options of each command, type %(prog)% COMMAND -h"
+  argparser = ap.ArgumentParser(description=description, epilog = epilog)
+  argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file")
+  argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
+  argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
+  argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
+  argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
+  return(argparser)
+
+def rc():
+  comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"}
+  return ''.join([comps[x] for x in seq.upper()[::-1]])
+
+def main():
+  argparser = prepare_argparser()
+  args = argparser.parse_args()
+  run(args.infile, args.genome, args.limit, args.output)
+
+def run(infile, genome, limit, output)
+  infile = pybedtools.BedTool(infile)
+  #genome = twobitreader.TwoBitFile(genome)
+  output = open(args.outfile,"w")
+  rowcount = 1
+  for record in infile:
+    while rowcount <=limit:
+      rowcount += 1
+      try:
+        seq = genome[record.chrom][record.start:record.stop]
+      except:
+        pass
+      else:
+        if record.strand == "-":
+          seq = rc(seq)
+        newfa_name = record.name#"_".join(record.fields)
+        newfa = SeqRecord(Seq(seq),newfa_name,description="")
+        SeqIO.write(newfa,output+".fa","fasta")
+    output.close()
+  #Call memechip
+  meme_command = "meme-chip -oc "+output+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10"
+  p = subprocess.Popen(meme_command, shell=True)
+  p.communicate()
+ 
+if __name__=="__main__":
+  main()
-- 
GitLab