Skip to content
Snippets Groups Projects
Commit 813fb41d authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Fix motif search and limit number of peaks.

parent 6de1721c
No related merge requests found
......@@ -14,6 +14,17 @@ params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.cutoffRatio = 1.2
params.topPeakCount = 600
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
topPeakCount = params.topPeakCount
// Check inputs
if( params.bwaIndex ){
......@@ -31,15 +42,8 @@ readsList = Channel
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
// Check design file for errors
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
......@@ -458,6 +462,6 @@ process motifSearch {
script:
"""
python3 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
"""
}
......@@ -2,7 +2,7 @@
'''Call Motifs on called peaks.'''
import os
import sys
import re
from re import sub
......@@ -42,6 +42,10 @@ def get_args():
help="The genome FASTA file.",
required=True)
parser.add_argument('-p', '--peak',
help="The number of peaks to use.",
required=True)
args = parser.parse_args()
return args
......@@ -50,33 +54,39 @@ def get_args():
def run_wrapper(args):
motif_search(*args)
def motif_search(filename, genome, experiment):
sorted_fn = 'sorted-%s' % (filename)
def motif_search(filename, genome, experiment, peak):
file_basename = os.path.basename(filename)
sorted_fn = 'sorted-%s' % (file_basename)
out_fa = '%s.fa' % (experiment)
out_motif = '%s_memechip' % (experiment)
# Sort Bed file by
out, err = run_pipe([
# Sort Bed file and limit number of peaks
if peak == -1:
peak = utils.count_lines(filename)
out, err = utils.run_pipe([
'sort -k %dgr,%dgr %s' % (5, 5, filename),
sorted_fn)
'head -n %s' % (peak)], outfile=sorted_fn)
# Get fasta file
out, err = utils.run_pipe([
'"bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
'bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
#Call memechip
out, err = utils.run_pipe([
'"meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -meme-norand' % (out_motif, out_fa)])
'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
def main():
args = get_args()
design = args.design
genome = args.genome
peak = args.peak
# Read files
design_df = pd.read_csv(design, sep='\t')
meme_arglist = zip(design_df['Peaks'].tolist(),[genome]*design_df.shape[0],design_df['Experiment'].tolist())
meme_arglist = zip(design_df['Peaks'].tolist(), [genome]*design_df.shape[0], design_df['Condition'].tolist(), [peak]*design_df.shape[0])
work_pool = Pool(min(12,design_df.shape[0]))
return_list = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment