Skip to content
Snippets Groups Projects
Commit e2f0afca authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update motif search scripts to meet new python standards

parent fa33cd9b
No related merge requests found
......@@ -57,7 +57,7 @@ process {
cpus = 32
}
withName: motifSearch {
module = ['python/2.7.x-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
cpus = 32
}
......
......@@ -455,11 +455,11 @@ process motifSearch {
output:
file "*meme*" into motifSearch
file "*meme_chip" into motifSearch
script:
"""
python2.7 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
python3 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
"""
}
#!/usr/bin/env python
# programmer : bbc
# usage:
#!/usr/bin/env python3
'''Call Motifs on called peaks.'''
import sys
import re
from re import sub
import string
import argparse as ap
import argparse
import logging
import subprocess
import pandas as pd
import utils
from multiprocessing import Pool
logging.basicConfig(level=10)
EPILOG = '''
For more details:
%(prog)s --help
'''
def prepare_argparser():
description = "Run memechip command"
epilog = "For command line options of each command, type %(prog)% COMMAND -h"
argparser = ap.ArgumentParser(description=description, epilog = epilog)
argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BED file")
# argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
# argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
# argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
return(argparser)
# SETTINGS
def rc(seq):
comps = {'A':"T",'C':"G",'G':"C",'T':"A","N":"N"}
return ''.join([comps[x] for x in seq.upper()[::-1]])
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def main():
argparser = prepare_argparser()
args = argparser.parse_args()
#run(args.infile, args.genome, args.limit, args.output)
#get Pool ready
dfile = pd.read_csv(args.infile, sep='\t')
meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome]*dfile.shape[0],dfile['Condition'].tolist())
work_pool = Pool(min(12,dfile.shape[0]))
resultList = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to run motif search.",
required=True)
parser.add_argument('-g', '--genome',
help="The genome FASTA file.",
required=True)
args = parser.parse_args()
return args
# Functions
def run_wrapper(args):
run(*args)
def run(infile, genome, output):
# Get fasta file
fasta_command = "bedtools getfasta -fi "+genome+' -bed '+infile+' -fo '+output+'.fa'
p = subprocess.Popen(fasta_command, shell=True)
p.communicate()
#Call memechip
meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa"
p = subprocess.Popen(meme_command, shell=True)
p.communicate()
motif_search(*args)
def motif_search(filename, genome, experiment):
sorted_fn = 'sorted-%s' % (filename)
out_fa = '%s.fa' % (experiment)
out_motif = '%s_memechip' % (experiment)
# Sort Bed file by
out, err = run_pipe([
'sort -k %dgr,%dgr %s' % (5, 5, filename),
sorted_fn)
# Get fasta file
out, err = utils.run_pipe([
'"bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
#Call memechip
out, err = utils.run_pipe([
'"meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -meme-norand' % (out_motif, out_fa)])
def main():
args = get_args()
design = args.design
genome = args.genome
# Read files
design_df = pd.read_csv(design, sep='\t')
meme_arglist = zip(design_df['Peaks'].tolist(),[genome]*design_df.shape[0],design_df['Experiment'].tolist())
work_pool = Pool(min(12,design_df.shape[0]))
return_list = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
if __name__=="__main__":
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment