Skip to content
Snippets Groups Projects
Commit 5c34de27 authored by Beibei Chen's avatar Beibei Chen
Browse files

next flow worked. All code added

parent f0b36739
No related merge requests found
#!/usr/bin/python
# programmer : bbc
# usage: main function to call all the procedures for chip-seq analysis
import sys
import os
import argparse as ap
import logging
import pandas as pd
import glob
import subprocess
from multiprocessing import Pool
import runDeepTools
import runMemechip
logging.basicConfig(level=10)
def prepare_argparser():
description = "Make wig file for given bed using bam"
epilog = "For command line options of each command, type %(prog)% COMMAND -h"
argparser = ap.ArgumentParser(description=description, epilog = epilog)
argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input design file")
argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19")
argparser.add_argument("--top-peak",dest="toppeak",type=int, default=-1, help = "Only use top peaks for motif call")
#argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"])
#argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header")
return(argparser)
def memechip_wrapper(args):
#print args
runMemechip.run(*args)
def main():
argparser = prepare_argparser()
args = argparser.parse_args()
#dfile = pd.read_csv(args.infile)
#for testing, add testing path to all input files
test_path = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/"
designfile = pd.read_csv(args.infile)
designfile['Peaks'] = designfile['Peaks'].apply(lambda x: test_path+x)
designfile['bamReads'] = designfile['bamReads'].apply(lambda x: test_path+x)
designfile['bamControl'] = designfile['bamControl'].apply(lambda x: test_path+x)
designfile.to_csv(args.infile+"_new",index=False)
dfile = pd.read_csv(args.infile+"_new")
#call deeptools
runDeepTools.run(args.infile+"_new", args.genome)
#call diffbind
this_script = os.path.abspath(__file__).split("/")
folder = "/".join(this_script[0:len(this_script)-1])
diffbind_command = "Rscript "+folder+"/runDiffBind.R "+args.infile+"_new"
#logging.debug(diffbind_command)
p = subprocess.Popen(diffbind_command, shell=True)
p.communicate()
#call chipseeker on original peaks and overlapping peaks
chipseeker_command = "Rscript "+folder+"/runChipseeker.R "+",".join(dfile['Peaks'].tolist())+" "+",".join(dfile['SampleID'])
#BC## logging.debug(chipseeker_command)
p = subprocess.Popen(chipseeker_command, shell=True)
p.communicate()
overlapping_peaks = glob.glob('*diffbind.xls')
overlapping_peak_names = []
for pn in overlapping_peaks:
overlapping_peak_names.append(pn.split("_diffbind")[0].replace("!","non"))
chipseeker_overlap_command = "Rscript "+folder+"/runChipseeker.R "+",".join(overlapping_peaks)+" "+",".join(overlapping_peak_names)
p = subprocess.Popen(chipseeker_overlap_command, shell=True)
p.communicate()
#MEME-chip on all peaks
meme_arglist = zip(dfile['Peaks'].tolist(),[test_path+"hg19.2bit"]*dfile.shape[0],[str(args.toppeak)]*dfile.shape[0],dfile['SampleID'].tolist())
#BC# #print meme_arglist
work_pool = Pool(min(12,dfile.shape[0]))
resultList = work_pool.map(memechip_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
if __name__=="__main__":
main()
args = commandArgs(trailingOnly=TRUE)
#if (length(args)==0) {
# stop("At least one argument must be supplied (input file).n", call.=FALSE)
#} else if (length(args)==1) {
# # default output file
# args[3] = "out.txt"
#}
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#library(clusterProfiler)
#files = list.files(".")
files<-as.list(unlist(strsplit(args[1],",")))
names(files)<-as.list(unlist(strsplit(args[2],",")))
print(files)
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
for(index in c(1:length(peakAnnoList)))
{
filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
#draw individual plot
pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="")
upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
pdf(pie_name)
plotAnnoPie(peakAnnoList[[index]])
dev.off()
pdf(vennpie_name)
vennpie(peakAnnoList[[index]])
dev.off()
pdf(upsetplot_name)
upsetplot(peakAnnoList[[index]])
dev.off()
}
#promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
#tagMatrixList <- lapply(files, getTagMatrix, windows=promoter)
#plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), facet="row")
#overlappeakfiles <- as.list(list.files("overlappeaks/"))
File added
File added
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment