First start at adding annotations.

a858f1c3 · Venkat Malladi · 55c0d935 · a858f1c3 · a858f1c3 · a858f1c3
Commit a858f1c3 authored 6 years ago by Venkat Malladi
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -34,6 +34,7 @@ readsList = Channel
 pairedEnd = params.pairedEnd
 designFile = params.designFile
 genomeSize = params.genomeSize
+genome = params.genome
 chromSizes = params.chromSizes
 cutoffRatio = params.cutoffRatio
@@ -377,3 +378,22 @@ process consensusPeaks {
  """
 }
+// Annotate Peaks
+process peakAnnotation {
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+  input:
+  file consensusPeaks
+   output:
+     file "*chipseeker*" into chipseeker_originalpeak_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
+"""
+}
--- a/workflow/scripts/overlap_peaks.py
+++ b/workflow/scripts/overlap_peaks.py
@@ -182,9 +182,12 @@ def main():
    design_peaks_df = pd.read_csv(design, sep='\t')
    design_files_df = pd.read_csv(files, sep='\t')
-    # Make a design file for
+    # Make a design file for differential binding
    design_diff = update_design(design_files_df)
+    # Make a design file for annotating Peaks
+    design_anno = pd.DataFrame()
    # Find consenus overlap peaks for each experiment
    for experiment, df_experiment in design_peaks_df.groupby('experiment_id'):
        replicated_peak = overlap(experiment, df_experiment)

--- a/workflow/scripts/runChipseeker.R
+++ b/workflow/scripts/runChipseeker.R
-args = commandArgs(trailingOnly=TRUE)
+#!/bin/Rscript
-#if (length(args)==0) {
-#  stop("At least one argument must be supplied (input file).n", call.=FALSE)
-#} else if (length(args)==1) {
-#  # default output file
-#  args[3] = "out.txt"
-#}
 library(ChIPseeker)
-#Parse the genome path and get genome version
-path_elements = unlist(strsplit(args[2],"[/]"))
-genome = path_elements[length(path_elements)]
+# Create parser object
+parser <- ArgumentParser()
-if(genome=="GRCh37")
-{ 
+# Specify our desired options
-library(TxDb.Hsapiens.UCSC.hg19.knownGene)
+parser$add_argument("-d", "--design", help = "File path to design file", required = TRUE)
-txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+parser$add_argument("-g", "--genome", help = "The genome assembly", required = TRUE)
-}
-if(genome=="GRCm38")
+# Parse arguments
-{ 
+args <- parser$parse_args()
-library(TxDb.Mmusculus.UCSC.mm10.knownGene)
-txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
+# Load UCSC Known Genes
+if(args$genome=='GRCh37') {
+    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
+    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+} else if(args$genome=='GRCm38')  {
+    library(TxDb.Mmusculus.UCSC.mm10.knownGene)
+    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
 }
-if(genome=="GRCh38")
+else if(args$genome=='GRCh38')  {
-{ 
+    library(TxDb.Hsapiens.UCSC.hg38.knownGene)
-library(TxDb.Hsapiens.UCSC.hg38.knownGene)
+    txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
-txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
 }
-design<-read.csv(args[1])
-files<-as.list(as.character(design$Peaks))
-names(files)<-design$SampleID
 peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
 for(index in c(1:length(peakAnnoList)))
@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList)))
  filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
  write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
  #draw individual plot
-  pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") 
+  pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
-  vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") 
+  vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="")
  upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
  pdf(pie_name)
  plotAnnoPie(peakAnnoList[[index]])
@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList)))
 }