From 5c34de270201a597f199cdf865e940a47f53c823 Mon Sep 17 00:00:00 2001 From: Beibei Chen <beibei.chen@utsouthwestern.edu> Date: Tue, 31 Jan 2017 12:15:06 -0600 Subject: [PATCH] next flow worked. All code added --- workflow/scripts/__init__.py | 1 + workflow/scripts/process.py | 77 ++++++++++++++++++++++++++++++ workflow/scripts/runChipseeker.R | 46 ++++++++++++++++++ workflow/scripts/runDeepTools.pyc | Bin 0 -> 4234 bytes workflow/scripts/runMemechip.pyc | Bin 0 -> 3260 bytes 5 files changed, 124 insertions(+) create mode 100644 workflow/scripts/__init__.py create mode 100644 workflow/scripts/process.py create mode 100644 workflow/scripts/runChipseeker.R create mode 100644 workflow/scripts/runDeepTools.pyc create mode 100644 workflow/scripts/runMemechip.pyc diff --git a/workflow/scripts/__init__.py b/workflow/scripts/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/workflow/scripts/__init__.py @@ -0,0 +1 @@ + diff --git a/workflow/scripts/process.py b/workflow/scripts/process.py new file mode 100644 index 0000000..f01fd8d --- /dev/null +++ b/workflow/scripts/process.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# programmer : bbc +# usage: main function to call all the procedures for chip-seq analysis +import sys +import os +import argparse as ap +import logging +import pandas as pd +import glob +import subprocess +from multiprocessing import Pool +import runDeepTools +import runMemechip +logging.basicConfig(level=10) + + +def prepare_argparser(): + description = "Make wig file for given bed using bam" + epilog = "For command line options of each command, type %(prog)% COMMAND -h" + argparser = ap.ArgumentParser(description=description, epilog = epilog) + argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input design file") + argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19") + argparser.add_argument("--top-peak",dest="toppeak",type=int, default=-1, help = "Only use top peaks for motif call") + #argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"]) + #argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header") + return(argparser) + +def memechip_wrapper(args): + #print args + runMemechip.run(*args) + +def main(): + argparser = prepare_argparser() + args = argparser.parse_args() + #dfile = pd.read_csv(args.infile) + + #for testing, add testing path to all input files + test_path = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/" + designfile = pd.read_csv(args.infile) + designfile['Peaks'] = designfile['Peaks'].apply(lambda x: test_path+x) + designfile['bamReads'] = designfile['bamReads'].apply(lambda x: test_path+x) + designfile['bamControl'] = designfile['bamControl'].apply(lambda x: test_path+x) + designfile.to_csv(args.infile+"_new",index=False) + dfile = pd.read_csv(args.infile+"_new") + #call deeptools + runDeepTools.run(args.infile+"_new", args.genome) + #call diffbind + this_script = os.path.abspath(__file__).split("/") + folder = "/".join(this_script[0:len(this_script)-1]) + + diffbind_command = "Rscript "+folder+"/runDiffBind.R "+args.infile+"_new" + #logging.debug(diffbind_command) + p = subprocess.Popen(diffbind_command, shell=True) + p.communicate() + #call chipseeker on original peaks and overlapping peaks + chipseeker_command = "Rscript "+folder+"/runChipseeker.R "+",".join(dfile['Peaks'].tolist())+" "+",".join(dfile['SampleID']) +#BC## logging.debug(chipseeker_command) + p = subprocess.Popen(chipseeker_command, shell=True) + p.communicate() + overlapping_peaks = glob.glob('*diffbind.xls') + overlapping_peak_names = [] + for pn in overlapping_peaks: + overlapping_peak_names.append(pn.split("_diffbind")[0].replace("!","non")) + chipseeker_overlap_command = "Rscript "+folder+"/runChipseeker.R "+",".join(overlapping_peaks)+" "+",".join(overlapping_peak_names) + p = subprocess.Popen(chipseeker_overlap_command, shell=True) + p.communicate() + #MEME-chip on all peaks + meme_arglist = zip(dfile['Peaks'].tolist(),[test_path+"hg19.2bit"]*dfile.shape[0],[str(args.toppeak)]*dfile.shape[0],dfile['SampleID'].tolist()) +#BC# #print meme_arglist + work_pool = Pool(min(12,dfile.shape[0])) + resultList = work_pool.map(memechip_wrapper, meme_arglist) + work_pool.close() + work_pool.join() + + +if __name__=="__main__": + main() diff --git a/workflow/scripts/runChipseeker.R b/workflow/scripts/runChipseeker.R new file mode 100644 index 0000000..03262de --- /dev/null +++ b/workflow/scripts/runChipseeker.R @@ -0,0 +1,46 @@ +args = commandArgs(trailingOnly=TRUE) +#if (length(args)==0) { +# stop("At least one argument must be supplied (input file).n", call.=FALSE) +#} else if (length(args)==1) { +# # default output file +# args[3] = "out.txt" +#} + +library(ChIPseeker) +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +#library(clusterProfiler) + +#files = list.files(".") +files<-as.list(unlist(strsplit(args[1],","))) +names(files)<-as.list(unlist(strsplit(args[2],","))) +print(files) + +peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) +for(index in c(1:length(peakAnnoList))) +{ + filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="") + write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F) + #draw individual plot + pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") + vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") + upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="") + pdf(pie_name) + plotAnnoPie(peakAnnoList[[index]]) + dev.off() + pdf(vennpie_name) + vennpie(peakAnnoList[[index]]) + dev.off() + pdf(upsetplot_name) + upsetplot(peakAnnoList[[index]]) + dev.off() + + +} +#promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000) +#tagMatrixList <- lapply(files, getTagMatrix, windows=promoter) + +#plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), facet="row") + +#overlappeakfiles <- as.list(list.files("overlappeaks/")) + diff --git a/workflow/scripts/runDeepTools.pyc b/workflow/scripts/runDeepTools.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90a7657d67359ee5b228fa1b5f01d1661b8421ca GIT binary patch literal 4234 zcmcInUvDHw5$~Q|+q-M~?BsGd$3@UObg&^=Z~g!hC`380bx9C5W^5^75si9h+OxAa z|2*AmpHE(d<UHk*<bfCXLVO5#hhNpq`W)hyJb3NtuIa9>uBxv3bxrHv>%D*Kum2Q^ z=GQ|12_E}*G!g!#NQF4QrlmOc7UL~(+*;6Waok?eOX7H$X-}jpBI}6bj*zA$rd<(X zL0eo2DWfIv6$JN0w9Le+h*p?b6H$kWbrE%$xF@0>6C1*8ifC0l_eAxLFdGmgWX?(+ z_Sc|r@?ZS+`x2yUqd6#w)GdKX&6!D)9w@eytX#)4!ejr624>FDdSWhx6Jjb!(@TuB z#ihhH-rN(Ho|wY+CDItWLQCl9U{H@XgIdv=xboVv;e+~69DEz!;ll{*cn#c3#C_O* z9V+bGCusNdbED3aSe+!PQ71*E;^fTaYHT7kvq>JSvCixcIG<rIEV4}JkxG-?sG@X9 zky}-qD5Jy4V)9+(E=r@G^vkM<-+rP-f7si5y1%Oi6Sqp18yi*$FLld!H)WC*vGqU> z61zg(Aj!*_V~2v<F<X+8f;>jzT?dSnIh`eyiFkBk($e0;C@)q!PxsiQT5xQMe=vwm zUSx&^HY9BrnG-!r>kTLI`@inf2|Ch9mySd}t>Rf`a(AFBYbr-4ucIi?%`nac$LcE9 z<EWaMJ`MOJz$eOe(SW(+<$b&eZtMZ5A>1@Ip&RaeK6=JYFe)lD9ETH=|6&+UlG2*f zK<7HWu!$X>7uEBVv^XEuZnVQ{mhT!<9wGGXc6s4mgV3@v*vycI^`>NnCB21Q#^0N= zFSpwvDMx2Q_Kxt_bwn~k6LC2e6pmL?Y#}hW<}KkQMdKys6AZqRa*kMSiz%g-p!8~1 zdAw@Av{-?OHihkXRv@aswa*InSzfHd<gKe-Oau}Psv%7f%2zG1`{j}NOH0gG#JnTs zNcoPqgeb_~y6m9uqQ`@*sryxIgAA(80UX@A&FMeH7eJ#r%lEB<b25ZTvZJ%Z8FHYy zP~!x#KNuiAC^D^rL?M)YgW5qcbIR(hOihrD?RH+iWLU6>($xM3EM;0aoO5MTO<AN6 z^Z&O1vcsa<Gj38u%9ch~NIeYN=Sg{Ns)9BrDMyJz<~}}LaNLxN)bx#x)To%{&OXM9 z^NDsx#R1l-iP0|8CGV4BCy;*h=X7V*2;QG%V^diLYm|9Wnku0j*p3ivj)Scc5G<6V zor4Pd)!PMUA8eNZkij6;W0P8CH?IvpGnI~w8jO|w8#|0mc}5s?+g3qX)GKO%xqAOS z{xzC5E~p(29IAQA+$7GCOuMr(O+xK(N`pa`<S?}awEc0W)5Kjsn7!X<_6)<ODyt-S ztQ;Zu4FmXa!y<Q8kv4m?&h`=bUU?=sc_jupna6!XI);a70nsGY&c?|6&{)fnaZr>d z_sKzwk_5D~oONb&d6|@b%2ej%cw+=?RI!X`wU>E0Vh%22tmd#R8M8{n)AcZc0S3;l z2N?8ldNmG)u1Z<qTmvvk;gfQ~de;a>&H}0hr{P0T_J?SM+>#Hxp1kipkUbAH+Q;&z z=tD|7f?B$N8N_}9qP|n<LdE?`h$L~*0y3t+AwnxkZCbSN(~<URt^WgRwy2?F@ZUtk zJdW;&#+&|+XcW&~A~=CHVC94Hd2n7KR893RMl2~Q$l?}yIuY^{Tl_B&Fcf&mum`k` zg#bLi_>hot_se$#;Fz!)Y$!99z}5yDE^*z#^~DvI0_^4KBk=|Ng_y(P*B1av3d~{> z;C#6(rT}&jKz&b0ddD=>BC1pS7Wj)RgK!s^jJt2zBS5CAWt9&C^Bplo?Lr5!N|4<! z0lRN!dIiKE!P4)0V<PR{u)gkxmabxYG25tcEFC#KJ!%jGDQ|%WocZK!LOxDE@Nhja zK2Iob?2pkPB2cB6J?*OGg|eYe&7d+dmqY_!SvVU4wE7T8MH1`2Vc997i{G0(b`$kW zp1%p}hwA6s<8#`)Ml^c^SxEI}7uqr7-M8bH9LH8;Pr4I6TR?mEJw)8RwQg=nh;AE+ z3}#fxGU8Bd<`L%y3XW8V#>W&z53Wn#9Q7&g41U+8|1+8-oC(vy8vg;PTeFUW+o!bm z-=c=jjNy)q4{&abambD)DJ^5%371(^9t9<p;Upx?7CfA|JT|!=yX!nA4G709ZXun% zhc`?12=5+Buyy&id?Ytz4@h>8@C;+fZ2~2Fu1m5{@p6r3l!bUj9mWa&kUCFr*Afp= z3`zL<#vBIea{}Xxcoft7>`4SP6$X|)!WqDK8qXba-SYV^qaufW8Sw-KLJ-<B{|V&y zzd}>vBk3B^rNk$7*IuPyf>(FYSwcYJZFp~aAya5*dW6RkX2Nk-Ktir%@r^|^-v{+g zKy8g<>>avD_PO@=>HCOX!__mD(boSh#(XlhwvrCXr&^nDN#eL`!|C)go#ekmUt2D_ za!vBl1Od$iL7z1uAJ_}aGmB3EpMZ-~6`xx%>J!d9PU8}vblUpAA%+b!T<JE&2{B@* zu1dz*CgG^Sr&!D`ssn+tnxmn1(I>>+K@$X2J3uGUcYbHmh96}`G)v9LbV!_OYj98f Ww<$MU-PP{>ZoB(>x6@r-lm7#)c=x0L literal 0 HcmV?d00001 diff --git a/workflow/scripts/runMemechip.pyc b/workflow/scripts/runMemechip.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d397028f858912677cfad02e2de1d34f683a954a GIT binary patch literal 3260 zcmcIm&2k$>5bjyYmgN7$NgT%zHiQ&Pfo#H$p(rXvY~xf8aZ+nnC8erFthFO~<^A){ zD6*@hli|WE@D3b#2p)w47ajn<?v<QW!PQ>xsHdl=r~8}!+0x(hGt1xp^0H0izY>1G z$7}w=5a3@?MAW>X6*ZOc9f}mi4mBN8W4RJFOOriiYEDh)X=+YS=n6F}6S_*x>V&RQ zvnF(jq8W;3sX1HdEmJf{@jNx>i}@5q3luL>bCHyurrr_-P+p;PQYxs@PcZfh1vMeA zQZOULG6l0jTqAv*f;l>NDF2i6GPaRQ#LfNsJX{X{!QWn8fwYW;=^qx8DNOeD$&+Vq z*Dx(6wPMZF?KBT8OGf&Qj=~vGf-GyA#k-By{D}dujxahjQe=sGz_dg|aLP1PFv=M@ zG<2xvPyiUkK{yAuU=)0^Wp)UDoI7PTmYb(wfq@7X|0B2bt{l(-E|3E#g+6Gy28q%{ z6A-!~5-TDB18$Oq4Uk)XSJN#D)<o_-k%P*c6ud8Tw<mJ@^$+0gp1Fq6>nCokW8FRq zGq;__v7ZEH3k$nxKAF2wm}oc6Y?vm-O*^jk+eeedPhESKY4=_|%hT@td+zqLr%xa4 z?YNC2%jgE$wDVB5!dxPqg;Cly4#-AmD*V$3ldNw6pGaE~3zJS5Y2mSYTg76oPx@i5 z1K}R&C^HM-%4T=#haH!96pB;B@{LB?x2%~@0Vx{UhJsy)cE_S!ousjrvkTH(f#hT1 zx}Up;p?ybxY*<aB5&Py?tPz5b33SKrM^>QSPLnq}w{EU8?OWs8)ZHr%jmC~gVI0~b zHkpz*H&>zNAk6?eSd2DyADcRRWEnO;v+VbyyxWg;VxRlD!6|dR5(F)O%q|ke=3?GK z-Y>8`Ow<+w~)(MJ7yaom0W%6!F+E%c%oBnP{$KLNb_Vpf+&LJ>9k&TTixkWoT`u zx!yQzAL-=F4GtrtPg;KBM`tE98-p}I?nLQeqX+}Dk@u6Q<9J)o&Mc#x0U<xvY<qDg z2d3aaY8uZ?Rmbz8jFP$x16wMo`y+@D`G9nFXcH-meBHzgY7-prH#uLYXp^%W)rqJJ zClx31IXK0Cg?=v4>dvbr&VD34GJf^7Lnlw@T+zOuzlS}z8W|tS_*lkm8TU{!#-&Q| z6ySR);AM1yiQwMPGM(4iewiZSG9%khFBxY(GH-L<8%{J*_tgcIeDeb3Hz42!Pyti` z0VR-WltWZL0ETjiQOaR&O6X}iFG~;Dm#Vv0=OpmfSPvIkxlX=UBPihRJ&t3K1K8uM z_qe_b9^Nfb0vd~7G8zm)?1#x+{7Np9s;N2EW<F=qMQ|P9%7F-g033(}-~ucPAb1LB z00&}D(F%g?9L+-_Z#2ahj4r@J(l!v0+~8hw)gJ3|2wm><cb@LN`i(}@bowg|W!tf$ zysAJWiob@kv)c^!5YbzqRsN~Uq(*l}6?&0W&}h|3LN9q~hJ&5Y`h!q{hiJpFp~7eJ z>Ku)#WakM*q*xlFhZWu0Q6n^;Meg6j+J#qDIxi6`e=#gm?+QnRI??n3jS7yhp!2Pu z3#|wZ6h`QTSE;v5L)bA#7F|rK5$u?y-gViGj4S&3o+$zA^^PwYFbFxlqz1~oQB<Tm zU`noZd}Dnn`P{D>(y$t7+m&+Fn)EQ!fV|rfkvL2S?pKqEe>!j<OlC=(+OT8X2cNm3 z+@POC^&5Pa#O>K3`V5Mtk>;_bg9EOd92$0zq8a4c4^VYk_F#~1g?4w+uF^~=-YN^S za}g>ur&s$ZPvhbw>*qzYGZrnCBhA<xUDx}VH*o|eeyly-?Qzz642x_X<e}Atx+q2Y z=F{;lgo@nO#>m2RIbQtW?#m6mpM-7S>N*?$f7$1WSDco4I%uc;r0Br8d^mY`!Iy&) zJ?Qu?HcbwasQ|)Ix5i%%a&g(^Ca-rNCh-ww9RsOFJj>`3b9ffjJl+{qQn&Gz9bT)c zH9QN>Ep=VVcSNhjso!c9*UdPaGb61v*B+~!TuPX9Wy7Q|dEA;MAn*y(#j5uO7~Vr3 z9`F+D+6vR+#<&t<vW_oOxku#OTSfsc%ga~Np>M)Az85-SSBj^^g7*oQ#oMvj3_i>T vw`j;hT}oH3;)w6!H0VeAA&0N|2m=GWBrr2o%$C4eD%ECd<=RwjZBG3Q;+C{f literal 0 HcmV?d00001 -- GitLab