Skip to content
Snippets Groups Projects
Commit 5198436d authored by Venkat Malladi's avatar Venkat Malladi
Browse files

add in motif search.

parent 65bc6b76
Branches
Tags
No related merge requests found
...@@ -49,6 +49,8 @@ workflow_modules: ...@@ -49,6 +49,8 @@ workflow_modules:
- 'macs/2.1.0-20151222' - 'macs/2.1.0-20151222'
- 'UCSC_userApps/v317' - 'UCSC_userApps/v317'
- 'R/3.4.1-gccmkl' - 'R/3.4.1-gccmkl'
- 'meme/4.11.1-gcc-openmpi'
- 'python/2.7.x-anaconda'
# A list of parameters used by the workflow, defining how to present them, # A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter: # options etc in the web interface. For each parameter:
......
...@@ -55,6 +55,10 @@ process { ...@@ -55,6 +55,10 @@ process {
module = ['R/3.4.1-gccmkl'] module = ['R/3.4.1-gccmkl']
cpus = 32 cpus = 32
} }
$motifSearch {
module = ['python/2.7.x-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
cpus = 32
}
} }
...@@ -65,16 +69,19 @@ params { ...@@ -65,16 +69,19 @@ params {
bwa = '/project/shared/bicf_workflow_ref/GRCh38' bwa = '/project/shared/bicf_workflow_ref/GRCh38'
genomesize = 'hs' genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt' chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh38/genome.fa'
} }
'GRCh37' { 'GRCh37' {
bwa = '/project/shared/bicf_workflow_ref/GRCh37' bwa = '/project/shared/bicf_workflow_ref/GRCh37'
genomesize = 'hs' genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt' chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh37/genome.fa'
} }
'GRCm38' { 'GRCm38' {
bwa = '/project/shared/bicf_workflow_ref/GRCm38' bwa = '/project/shared/bicf_workflow_ref/GRCm38'
genomesize = 'mm' genomesize = 'mm'
chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt' chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCm38/genome.fa'
} }
} }
} }
......
...@@ -12,6 +12,7 @@ params.genomes = [] ...@@ -12,6 +12,7 @@ params.genomes = []
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.cutoffRatio = 1.2 params.cutoffRatio = 1.2
// Check inputs // Check inputs
...@@ -36,6 +37,7 @@ designFile = params.designFile ...@@ -36,6 +37,7 @@ designFile = params.designFile
genomeSize = params.genomeSize genomeSize = params.genomeSize
genome = params.genome genome = params.genome
chromSizes = params.chromSizes chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio cutoffRatio = params.cutoffRatio
process checkDesignFile { process checkDesignFile {
...@@ -371,6 +373,7 @@ process consensusPeaks { ...@@ -371,6 +373,7 @@ process consensusPeaks {
file '*.rejected.*' into rejectedPeaks file '*.rejected.*' into rejectedPeaks
file("design_diffPeaks.csv") into designDiffPeaks file("design_diffPeaks.csv") into designDiffPeaks
file("design_annotatePeaks.tsv") into designAnnotatePeaks file("design_annotatePeaks.tsv") into designAnnotatePeaks
file("design_annotatePeaks.tsv") into designMotifSearch
file("unqiue_experiments.csv") into uniqueExperiments file("unqiue_experiments.csv") into uniqueExperiments
script: script:
...@@ -441,3 +444,23 @@ process diffPeaks { ...@@ -441,3 +444,23 @@ process diffPeaks {
} }
} }
// Motif Search Peaks
process motifSearch {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file designMotifSearch
output:
file "*meme*" into motifSearch
script:
"""
python2.7 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
"""
}
#!/qbrc/software/Python-2.7.7/bin/python #!/usr/bin/env python
# programmer : bbc # programmer : bbc
# usage: # usage:
...@@ -8,12 +8,8 @@ from re import sub ...@@ -8,12 +8,8 @@ from re import sub
import string import string
import argparse as ap import argparse as ap
import logging import logging
import twobitreader
import subprocess import subprocess
import pandas as pd import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from multiprocessing import Pool from multiprocessing import Pool
logging.basicConfig(level=10) logging.basicConfig(level=10)
...@@ -26,7 +22,7 @@ def prepare_argparser(): ...@@ -26,7 +22,7 @@ def prepare_argparser():
# argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output") # argparser.add_argument("-o","--output",dest = "outfile",type=str,required=True, help="output")
argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file") argparser.add_argument("-g","--genome",dest = "genome",type=str, help="Genome 2 bit file")
# argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N") # argparser.add_argument("-m","--mask",dest = "mask",type=bool,default=False, help="Convert repeats to N")
argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks") # argparser.add_argument("-l","--limit",dest = "limit",type=int,default=-1, help="Top limit of peaks")
return(argparser) return(argparser)
def rc(seq): def rc(seq):
...@@ -38,8 +34,8 @@ def main(): ...@@ -38,8 +34,8 @@ def main():
args = argparser.parse_args() args = argparser.parse_args()
#run(args.infile, args.genome, args.limit, args.output) #run(args.infile, args.genome, args.limit, args.output)
#get Pool ready #get Pool ready
dfile = pd.read_csv(args.infile) dfile = pd.read_csv(args.infile, sep='\t')
meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome+"/genome.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist()) meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome]*dfile.shape[0],dfile['Condition'].tolist())
work_pool = Pool(min(12,dfile.shape[0])) work_pool = Pool(min(12,dfile.shape[0]))
resultList = work_pool.map(run_wrapper, meme_arglist) resultList = work_pool.map(run_wrapper, meme_arglist)
work_pool.close() work_pool.close()
...@@ -49,43 +45,16 @@ def main(): ...@@ -49,43 +45,16 @@ def main():
def run_wrapper(args): def run_wrapper(args):
run(*args) run(*args)
def run(infile, genome, output):
# Get fasta file
fasta_command = "bedtools getfasta -fi "+genome+' -bed '+infile+' -fo '+output+'.fa'
p = subprocess.Popen(fasta_command, shell=True)
p.communicate()
def run(infile, genome, limit, output):
infile = open(infile).readlines()
logging.debug(len(infile))
genome = twobitreader.TwoBitFile(genome)
outfile = open(output+".fa","w")
rowcount = 0
limit = int(limit)
logging.debug(limit)
if limit < 0:
limit = len(infile)
for record in infile:
rowcount += 1
#logging.debug(record)
if rowcount <=limit:
seqbuf = record.rstrip().split("\t")
try:
#logging.debug(record.chrom)
seq = genome[seqbuf[0]][int(seqbuf[1]):int(seqbuf[2])]
except:
pass
else:
if len(seqbuf)>=5:
if seqbuf[5] == "-":
seq = rc(seq)
if len(seqbuf)>=4:
newfa_name = seqbuf[3]
else:
newfa_name = "_".join(seqbuf)
newfa = SeqRecord(Seq(seq),newfa_name,description="")
#logging.debug(seq)
SeqIO.write(newfa,outfile,"fasta")
outfile.close()
#Call memechip #Call memechip
meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa" meme_command = "meme-chip -oc "+output+"_memechip"+" -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 "+output+".fa"
p = subprocess.Popen(meme_command, shell=True) p = subprocess.Popen(meme_command, shell=True)
p.communicate() p.communicate()
if __name__=="__main__": if __name__=="__main__":
main() main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment