From a858f1c3ce15f0cbaa70591ba9933aef8029b81c Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Tue, 8 May 2018 13:42:50 -0500
Subject: [PATCH] First start at adding annotations.

---
 workflow/main.nf                  | 20 +++++++++++
 workflow/scripts/overlap_peaks.py |  5 ++-
 workflow/scripts/runChipseeker.R  | 57 ++++++++++++++-----------------
 3 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/workflow/main.nf b/workflow/main.nf
index 31ab992..aaa3a26 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -34,6 +34,7 @@ readsList = Channel
 pairedEnd = params.pairedEnd
 designFile = params.designFile
 genomeSize = params.genomeSize
+genome = params.genome
 chromSizes = params.chromSizes
 cutoffRatio = params.cutoffRatio
 
@@ -377,3 +378,22 @@ process consensusPeaks {
   """
 
 }
+
+// Annotate Peaks
+process peakAnnotation {
+
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+
+  input:
+
+  file consensusPeaks
+
+   output:
+     file "*chipseeker*" into chipseeker_originalpeak_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
+"""
+}
diff --git a/workflow/scripts/overlap_peaks.py b/workflow/scripts/overlap_peaks.py
index 379f884..82f63b5 100644
--- a/workflow/scripts/overlap_peaks.py
+++ b/workflow/scripts/overlap_peaks.py
@@ -182,9 +182,12 @@ def main():
     design_peaks_df = pd.read_csv(design, sep='\t')
     design_files_df = pd.read_csv(files, sep='\t')
 
-    # Make a design file for
+    # Make a design file for differential binding
     design_diff = update_design(design_files_df)
 
+    # Make a design file for annotating Peaks
+    design_anno = pd.DataFrame()
+
     # Find consenus overlap peaks for each experiment
     for experiment, df_experiment in design_peaks_df.groupby('experiment_id'):
         replicated_peak = overlap(experiment, df_experiment)
diff --git a/workflow/scripts/runChipseeker.R b/workflow/scripts/runChipseeker.R
index b20df78..8f8b0d0 100644
--- a/workflow/scripts/runChipseeker.R
+++ b/workflow/scripts/runChipseeker.R
@@ -1,36 +1,32 @@
-args = commandArgs(trailingOnly=TRUE)
-#if (length(args)==0) {
-#  stop("At least one argument must be supplied (input file).n", call.=FALSE)
-#} else if (length(args)==1) {
-#  # default output file
-#  args[3] = "out.txt"
-#}
+#!/bin/Rscript
 
 library(ChIPseeker)
-#Parse the genome path and get genome version
-path_elements = unlist(strsplit(args[2],"[/]"))
-genome = path_elements[length(path_elements)]
-
-if(genome=="GRCh37")
-{ 
-library(TxDb.Hsapiens.UCSC.hg19.knownGene)
-txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-}
-if(genome=="GRCm38")
-{ 
-library(TxDb.Mmusculus.UCSC.mm10.knownGene)
-txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
+
+
+# Create parser object
+parser <- ArgumentParser()
+
+# Specify our desired options
+parser$add_argument("-d", "--design", help = "File path to design file", required = TRUE)
+parser$add_argument("-g", "--genome", help = "The genome assembly", required = TRUE)
+
+# Parse arguments
+args <- parser$parse_args()
+
+
+# Load UCSC Known Genes
+if(args$genome=='GRCh37') {
+    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
+    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+} else if(args$genome=='GRCm38')  {
+    library(TxDb.Mmusculus.UCSC.mm10.knownGene)
+    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
 }
-if(genome=="GRCh38")
-{ 
-library(TxDb.Hsapiens.UCSC.hg38.knownGene)
-txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
+else if(args$genome=='GRCh38')  {
+    library(TxDb.Hsapiens.UCSC.hg38.knownGene)
+    txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
 }
 
-design<-read.csv(args[1])
-files<-as.list(as.character(design$Peaks))
-names(files)<-design$SampleID
-
 
 peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
 for(index in c(1:length(peakAnnoList)))
@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList)))
   filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
   write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
   #draw individual plot
-  pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") 
-  vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") 
+  pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
+  vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="")
   upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
   pdf(pie_name)
   plotAnnoPie(peakAnnoList[[index]])
@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList)))
 
 
 }
-
-- 
GitLab