From 5c34de270201a597f199cdf865e940a47f53c823 Mon Sep 17 00:00:00 2001
From: Beibei Chen <beibei.chen@utsouthwestern.edu>
Date: Tue, 31 Jan 2017 12:15:06 -0600
Subject: [PATCH] next flow worked. All code added

---
 workflow/scripts/__init__.py      |   1 +
 workflow/scripts/process.py       |  77 ++++++++++++++++++++++++++++++
 workflow/scripts/runChipseeker.R  |  46 ++++++++++++++++++
 workflow/scripts/runDeepTools.pyc | Bin 0 -> 4234 bytes
 workflow/scripts/runMemechip.pyc  | Bin 0 -> 3260 bytes
 5 files changed, 124 insertions(+)
 create mode 100644 workflow/scripts/__init__.py
 create mode 100644 workflow/scripts/process.py
 create mode 100644 workflow/scripts/runChipseeker.R
 create mode 100644 workflow/scripts/runDeepTools.pyc
 create mode 100644 workflow/scripts/runMemechip.pyc

diff --git a/workflow/scripts/__init__.py b/workflow/scripts/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/workflow/scripts/__init__.py
@@ -0,0 +1 @@
+
diff --git a/workflow/scripts/process.py b/workflow/scripts/process.py
new file mode 100644
index 0000000..f01fd8d
--- /dev/null
+++ b/workflow/scripts/process.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+# programmer : bbc
+# usage: main function to call all the procedures for chip-seq analysis
+import sys
+import os
+import argparse as ap
+import logging
+import pandas as pd
+import glob
+import subprocess
+from multiprocessing import Pool
+import runDeepTools
+import runMemechip
+logging.basicConfig(level=10)
+
+
+def prepare_argparser():
+  description = "Make wig file for given bed using bam"
+  epilog = "For command line options of each command, type %(prog)% COMMAND -h"
+  argparser = ap.ArgumentParser(description=description, epilog = epilog)
+  argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input design file")
+  argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19")
+  argparser.add_argument("--top-peak",dest="toppeak",type=int, default=-1, help = "Only use top peaks for motif call")
+  #argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"])
+  #argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header")
+  return(argparser)
+
+def memechip_wrapper(args):
+  #print args  
+  runMemechip.run(*args)
+
+def main():
+  argparser = prepare_argparser()
+  args = argparser.parse_args()
+  #dfile = pd.read_csv(args.infile)
+
+  #for testing, add testing path to all input files
+  test_path = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/"
+  designfile = pd.read_csv(args.infile)
+  designfile['Peaks'] = designfile['Peaks'].apply(lambda x: test_path+x)
+  designfile['bamReads'] = designfile['bamReads'].apply(lambda x: test_path+x)
+  designfile['bamControl'] = designfile['bamControl'].apply(lambda x: test_path+x)
+  designfile.to_csv(args.infile+"_new",index=False)
+  dfile = pd.read_csv(args.infile+"_new")
+  #call deeptools
+  runDeepTools.run(args.infile+"_new", args.genome) 
+  #call diffbind
+  this_script = os.path.abspath(__file__).split("/")
+  folder = "/".join(this_script[0:len(this_script)-1])
+  
+  diffbind_command = "Rscript "+folder+"/runDiffBind.R "+args.infile+"_new"
+  #logging.debug(diffbind_command)
+  p = subprocess.Popen(diffbind_command, shell=True)
+  p.communicate()
+  #call chipseeker on original peaks and overlapping peaks
+  chipseeker_command = "Rscript "+folder+"/runChipseeker.R "+",".join(dfile['Peaks'].tolist())+" "+",".join(dfile['SampleID'])
+#BC##  logging.debug(chipseeker_command)
+  p = subprocess.Popen(chipseeker_command, shell=True)
+  p.communicate()
+  overlapping_peaks = glob.glob('*diffbind.xls')
+  overlapping_peak_names = []
+  for pn in overlapping_peaks:
+    overlapping_peak_names.append(pn.split("_diffbind")[0].replace("!","non"))
+  chipseeker_overlap_command = "Rscript "+folder+"/runChipseeker.R "+",".join(overlapping_peaks)+" "+",".join(overlapping_peak_names)
+  p = subprocess.Popen(chipseeker_overlap_command, shell=True)
+  p.communicate()
+  #MEME-chip on all peaks
+  meme_arglist =  zip(dfile['Peaks'].tolist(),[test_path+"hg19.2bit"]*dfile.shape[0],[str(args.toppeak)]*dfile.shape[0],dfile['SampleID'].tolist())
+#BC#  #print meme_arglist
+  work_pool = Pool(min(12,dfile.shape[0]))
+  resultList = work_pool.map(memechip_wrapper, meme_arglist)
+  work_pool.close()
+  work_pool.join()
+ 
+
+if __name__=="__main__":
+  main()
diff --git a/workflow/scripts/runChipseeker.R b/workflow/scripts/runChipseeker.R
new file mode 100644
index 0000000..03262de
--- /dev/null
+++ b/workflow/scripts/runChipseeker.R
@@ -0,0 +1,46 @@
+args = commandArgs(trailingOnly=TRUE)
+#if (length(args)==0) {
+#  stop("At least one argument must be supplied (input file).n", call.=FALSE)
+#} else if (length(args)==1) {
+#  # default output file
+#  args[3] = "out.txt"
+#}
+
+library(ChIPseeker)
+library(TxDb.Hsapiens.UCSC.hg19.knownGene)
+txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+#library(clusterProfiler)
+
+#files = list.files(".")
+files<-as.list(unlist(strsplit(args[1],",")))
+names(files)<-as.list(unlist(strsplit(args[2],",")))
+print(files)
+
+peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
+for(index in c(1:length(peakAnnoList)))
+{
+  filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
+  write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
+  #draw individual plot
+  pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") 
+  vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") 
+  upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
+  pdf(pie_name)
+  plotAnnoPie(peakAnnoList[[index]])
+  dev.off()
+  pdf(vennpie_name)
+  vennpie(peakAnnoList[[index]])
+  dev.off()
+  pdf(upsetplot_name)
+  upsetplot(peakAnnoList[[index]])
+  dev.off()
+
+
+}
+#promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
+#tagMatrixList <- lapply(files, getTagMatrix, windows=promoter)
+
+#plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), facet="row")
+
+#overlappeakfiles <- as.list(list.files("overlappeaks/"))
+
diff --git a/workflow/scripts/runDeepTools.pyc b/workflow/scripts/runDeepTools.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90a7657d67359ee5b228fa1b5f01d1661b8421ca
GIT binary patch
literal 4234
zcmcInUvDHw5$~Q|+q-M~?BsGd$3@UObg&^=Z~g!hC`380bx9C5W^5^75si9h+OxAa
z|2*AmpHE(d<UHk*<bfCXLVO5#hhNpq`W)hyJb3NtuIa9>uBxv3bxrHv>%D*Kum2Q^
z=GQ|12_E}*G!g!#NQF4QrlmOc7UL~(+*;6Waok?eOX7H$X-}jpBI}6bj*zA$rd<(X
zL0eo2DWfIv6$JN0w9Le+h*p?b6H$kWbrE%$xF@0>6C1*8ifC0l_eAxLFdGmgWX?(+
z_Sc|r@?ZS+`x2yUqd6#w)GdKX&6!D)9w@eytX#)4!ejr624>FDdSWhx6Jjb!(@TuB
z#ihhH-rN(Ho|wY+CDItWLQCl9U{H@XgIdv=xboVv;e+~69DEz!;ll{*cn#c3#C_O*
z9V+bGCusNdbED3aSe+!PQ71*E;^fTaYHT7kvq>JSvCixcIG<rIEV4}JkxG-?sG@X9
zky}-qD5Jy4V)9+(E=r@G^vkM<-+rP-f7si5y1%Oi6Sqp18yi*$FLld!H)WC*vGqU>
z61zg(Aj!*_V~2v<F<X+8f;>jzT?dSnIh`eyiFkBk($e0;C@)q!PxsiQT5xQMe=vwm
zUSx&^HY9BrnG-!r>kTLI`@inf2|Ch9mySd}t>Rf`a(AFBYbr-4ucIi?%`nac$LcE9
z<EWaMJ`MOJz$eOe(SW(+<$b&eZtMZ5A>1@Ip&RaeK6=JYFe)lD9ETH=|6&+UlG2*f
zK<7HWu!$X>7uEBVv^XEuZnVQ{mhT!<9wGGXc6s4mgV3@v*vycI^`>NnCB21Q#^0N=
zFSpwvDMx2Q_Kxt_bwn~k6LC2e6pmL?Y#}hW<}KkQMdKys6AZqRa*kMSiz%g-p!8~1
zdAw@Av{-?OHihkXRv@aswa*InSzfHd<gKe-Oau}Psv%7f%2zG1`{j}NOH0gG#JnTs
zNcoPqgeb_~y6m9uqQ`@*sryxIgAA(80UX@A&FMeH7eJ#r%lEB<b25ZTvZJ%Z8FHYy
zP~!x#KNuiAC^D^rL?M)YgW5qcbIR(hOihrD?RH+iWLU6>($xM3EM;0aoO5MTO<AN6
z^Z&O1vcsa<Gj38u%9ch~NIeYN=Sg{Ns)9BrDMyJz<~}}LaNLxN)bx#x)To%{&OXM9
z^NDsx#R1l-iP0|8CGV4BCy;*h=X7V*2;QG%V^diLYm|9Wnku0j*p3ivj)Scc5G<6V
zor4Pd)!PMUA8eNZkij6;W0P8CH?IvpGnI~w8jO|w8#|0mc}5s?+g3qX)GKO%xqAOS
z{xzC5E~p(29IAQA+$7GCOuMr(O+xK(N`pa`<S?}awEc0W)5Kjsn7!X<_6)<ODyt-S
ztQ;Zu4FmXa!y<Q8kv4m?&h`=bUU?=sc_jupna6!XI);a70nsGY&c?|6&{)fnaZr>d
z_sKzwk_5D~oONb&d6|@b%2ej%cw+=?RI!X`wU>E0Vh%22tmd#R8M8{n)AcZc0S3;l
z2N?8ldNmG)u1Z<qTmvvk;gfQ~de;a>&H}0hr{P0T_J?SM+>#Hxp1kipkUbAH+Q;&z
z=tD|7f?B$N8N_}9qP|n<LdE?`h$L~*0y3t+AwnxkZCbSN(~<URt^WgRwy2?F@ZUtk
zJdW;&#+&|+XcW&~A~=CHVC94Hd2n7KR893RMl2~Q$l?}yIuY^{Tl_B&Fcf&mum`k`
zg#bLi_>hot_se$#;Fz!)Y$!99z}5yDE^*z#^~DvI0_^4KBk=|Ng_y(P*B1av3d~{>
z;C#6(rT}&jKz&b0ddD=>BC1pS7Wj)RgK!s^jJt2zBS5CAWt9&C^Bplo?Lr5!N|4<!
z0lRN!dIiKE!P4)0V<PR{u)gkxmabxYG25tcEFC#KJ!%jGDQ|%WocZK!LOxDE@Nhja
zK2Iob?2pkPB2cB6J?*OGg|eYe&7d+dmqY_!SvVU4wE7T8MH1`2Vc997i{G0(b`$kW
zp1%p}hwA6s<8#`)Ml^c^SxEI}7uqr7-M8bH9LH8;Pr4I6TR?mEJw)8RwQg=nh;AE+
z3}#fxGU8Bd<`L%y3XW8V#>W&z53Wn#9Q7&g41U+8|1+8-oC(vy8vg;PTeFUW+o!bm
z-=c=jjNy)q4{&abambD)DJ^5%371(^9t9<p;Upx?7CfA|JT|!=yX!nA4G709ZXun%
zhc`?12=5+Buyy&id?Ytz4@h>8@C;+fZ2~2Fu1m5{@p6r3l!bUj9mWa&kUCFr*Afp=
z3`zL<#vBIea{}Xxcoft7>`4SP6$X|)!WqDK8qXba-SYV^qaufW8Sw-KLJ-<B{|V&y
zzd}>vBk3B^rNk$7*IuPyf>(FYSwcYJZFp~aAya5*dW6RkX2Nk-Ktir%@r^|^-v{+g
zKy8g<>>avD_PO@=>HCOX!__mD(boSh#(XlhwvrCXr&^nDN#eL`!|C)go#ekmUt2D_
za!vBl1Od$iL7z1uAJ_}aGmB3EpMZ-~6`xx%>J!d9PU8}vblUpAA%+b!T<JE&2{B@*
zu1dz*CgG^Sr&!D`ssn+tnxmn1(I>>+K@$X2J3uGUcYbHmh96}`G)v9LbV!_OYj98f
Ww<$MU-PP{>ZoB(>x6@r-lm7#)c=x0L

literal 0
HcmV?d00001

diff --git a/workflow/scripts/runMemechip.pyc b/workflow/scripts/runMemechip.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d397028f858912677cfad02e2de1d34f683a954a
GIT binary patch
literal 3260
zcmcIm&2k$>5bjyYmgN7$NgT%zHiQ&Pfo#H$p(rXvY~xf8aZ+nnC8erFthFO~<^A){
zD6*@hli|WE@D3b#2p)w47ajn<?v<QW!PQ>xsHdl=r~8}!+0x(hGt1xp^0H0izY>1G
z$7}w=5a3@?MAW>X6*ZOc9f}mi4mBN8W4RJFOOriiYEDh)X=+YS=n6F}6S_*x>V&RQ
zvnF(jq8W;3sX1HdEmJf{@jNx>i}@5q3luL>bCHyurrr_-P+p;PQYxs@PcZfh1vMeA
zQZOULG6l0jTqAv*f;l>NDF2i6GPaRQ#LfNsJX{X{!QWn8fwYW;=^qx8DNOeD$&+Vq
z*Dx(6wPMZF?KBT8OGf&Qj=~vGf-GyA#k-By{D}dujxahjQe=sGz_dg|aLP1PFv=M@
zG<2xvPyiUkK{yAuU=)0^Wp)UDoI7PTmYb(wfq@7X|0B2bt{l(-E|3E#g+6Gy28q%{
z6A-!~5-TDB18$Oq4Uk)XSJN#D)<o_-k%P*c6ud8Tw<mJ@^$+0gp1Fq6>nCokW8FRq
zGq;__v7ZEH3k$nxKAF2wm}oc6Y?vm-O*^jk+eeedPhESKY4=_|%hT@td+zqLr%xa4
z?YNC2%jgE$wDVB5!dxPqg;Cly4#-AmD*V$3ldNw6pGaE~3zJS5Y2mSYTg76oPx@i5
z1K}R&C^HM-%4T=#haH!96pB;B@{LB?x2%~@0Vx{UhJsy)cE_S!ousjrvkTH(f#hT1
zx}Up;p?ybxY*<aB5&Py?tPz5b33SKrM^>QSPLnq}w{EU8?OWs8)ZHr%jmC~gVI0~b
zHkpz*H&>zNAk6?eSd2DyADcRRWEnO;v+VbyyxWg;VxRlD!6|dR5(F)O%q|ke=3?GK
z-Y>8`Ow&lt+w~)(MJ7yaom0W%6!F+E%c%oBnP{$KLNb_Vpf+&LJ>9k&TTixkWoT`u
zx!yQzAL-=F4GtrtPg;KBM`tE98-p}I?nLQeqX+}Dk@u6Q<9J)o&Mc#x0U<xvY<qDg
z2d3aaY8uZ?Rmbz8jFP$x16wMo`y+@D`G9nFXcH-meBHzgY7-prH#uLYXp^%W)rqJJ
zClx31IXK0Cg?=v4>dvbr&VD34GJf^7Lnlw@T+zOuzlS}z8W|tS_*lkm8TU{!#-&Q|
z6ySR);AM1yiQwMPGM(4iewiZSG9%khFBxY(GH-L<8%{J*_tgcIeDeb3Hz42!Pyti`
z0VR-WltWZL0ETjiQOaR&O6X}iFG~;Dm#Vv0=OpmfSPvIkxlX=UBPihRJ&t3K1K8uM
z_qe_b9^Nfb0vd~7G8zm)?1#x+{7Np9s;N2EW<F=qMQ|P9%7F-g033(}-~ucPAb1LB
z00&}D(F%g?9L+-_Z#2ahj4r@J(l!v0+~8hw)gJ3|2wm><cb@LN`i(}@bowg|W!tf$
zysAJWiob@kv)c^!5YbzqRsN~Uq(*l}6?&0W&}h|3LN9q~hJ&5Y`h!q{hiJpFp~7eJ
z>Ku)#WakM*q*xlFhZWu0Q6n^;Meg6j+J#qDIxi6`e=#gm?+QnRI??n3jS7yhp!2Pu
z3#|wZ6h`QTSE;v5L)bA#7F|rK5$u?y-gViGj4S&3o+$zA^^PwYFbFxlqz1~oQB<Tm
zU`noZd}Dnn`P{D>(y$t7+m&+Fn)EQ!fV|rfkvL2S?pKqEe>!j<OlC=(+OT8X2cNm3
z+@POC^&5Pa#O>K3`V5Mtk>;_bg9EOd92$0zq8a4c4^VYk_F#~1g?4w+uF^~=-YN^S
za}g>ur&s$ZPvhbw>*qzYGZrnCBhA<xUDx}VH*o|eeyly-?Qzz642x_X<e}Atx+q2Y
z=F{;lgo@nO#>m2RIbQtW?#m6mpM-7S>N*?$f7$1WSDco4I%uc;r0Br8d^mY`!Iy&)
zJ?Qu?HcbwasQ|)Ix5i%%a&g(^Ca-rNCh-ww9RsOFJj>`3b9ffjJl+{qQn&Gz9bT)c
zH9QN>Ep=VVcSNhjso!c9*UdPaGb61v*B+~!TuPX9Wy7Q|dEA;MAn*y(#j5uO7~Vr3
z9`F+D+6vR+#<&t<vW_oOxku#OTSfsc%ga~Np>M)Az85-SSBj^^g7*oQ#oMvj3_i>T
vw`j;hT}oH3;)w6!H0VeAA&0N|2m=GWBrr2o%$C4eD%ECd<=RwjZBG3Q;+C{f

literal 0
HcmV?d00001

-- 
GitLab