Skip to content
Snippets Groups Projects
Commit e2f0afca authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update motif search scripts to meet new python standards

parent fa33cd9b
No related merge requests found
...@@ -57,7 +57,7 @@ process { ...@@ -57,7 +57,7 @@ process {
cpus = 32 cpus = 32
} }
withName: motifSearch { withName: motifSearch {
module = ['python/2.7.x-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0'] module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
cpus = 32 cpus = 32
} }
......
...@@ -455,11 +455,11 @@ process motifSearch { ...@@ -455,11 +455,11 @@ process motifSearch {
output: output:
file "*meme*" into motifSearch file "*meme_chip" into motifSearch
script: script:
""" """
python2.7 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta python3 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
""" """
} }
#!/usr/bin/env python #!/usr/bin/env python3
# programmer : bbc
# usage: '''Call Motifs on called peaks.'''
import sys import sys
import re import re
from re import sub from re import sub
import string import string
import argparse as ap import argparse
import logging import logging
import subprocess import subprocess
import pandas as pd import pandas as pd
import utils
from multiprocessing import Pool from multiprocessing import Pool
logging.basicConfig(level=10)
EPILOG = '''
For more details:
%(prog)s --help
'''
def prepare_argparser(): # SETTINGS
description = "Run memechip command"
epilog = "For command line options of each command, type %(prog)% COMMAND -h"
argparser = ap.ArgumentParser(description=description, epilog = epilog)
argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file")
# argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
# argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
# argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
return(argparser)
def rc(seq): logger = logging.getLogger(__name__)
comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"} logger.addHandler(logging.NullHandler())
return ''.join([comps[x] for x in seq.upper()[::-1]]) logger.propagate = False
logger.setLevel(logging.INFO)
def main():
argparser = prepare_argparser()
args = argparser.parse_args()
#run(args.infile, args.genome, args.limit, args.output)
#get Pool ready
dfile = pd.read_csv(args.infile, sep='\t')
meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome]*dfile.shape[0],dfile['Condition'].tolist())
work_pool = Pool(min(12,dfile.shape[0]))
resultList = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to run motif search.",
required=True)
parser.add_argument('-g', '--genome',
help="The genome FASTA file.",
required=True)
args = parser.parse_args()
return args
# Functions
def run_wrapper(args): def run_wrapper(args):
run(*args) motif_search(*args)
def run(infile, genome, output): def motif_search(filename, genome, experiment):
# Get fasta file sorted_fn = 'sorted-%s' % (filename)
fasta_command = "bedtools getfasta -fi "+genome+' -bed '+infile+' -fo '+output+'.fa' out_fa = '%s.fa' % (experiment)
p = subprocess.Popen(fasta_command, shell=True) out_motif = '%s_memechip' % (experiment)
p.communicate()
# Sort Bed file by
#Call memechip out, err = run_pipe([
meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa" 'sort -k %dgr,%dgr %s' % (5, 5, filename),
p = subprocess.Popen(meme_command, shell=True) sorted_fn)
p.communicate()
# Get fasta file
out, err = utils.run_pipe([
'"bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
#Call memechip
out, err = utils.run_pipe([
'"meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -meme-norand' % (out_motif, out_fa)])
def main():
args = get_args()
design = args.design
genome = args.genome
# Read files
design_df = pd.read_csv(design, sep='\t')
meme_arglist = zip(design_df['Peaks'].tolist(),[genome]*design_df.shape[0],design_df['Experiment'].tolist())
work_pool = Pool(min(12,design_df.shape[0]))
return_list = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
if __name__=="__main__": if __name__=="__main__":
main() main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment