From a858f1c3ce15f0cbaa70591ba9933aef8029b81c Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Tue, 8 May 2018 13:42:50 -0500 Subject: [PATCH] First start at adding annotations. --- workflow/main.nf | 20 +++++++++++ workflow/scripts/overlap_peaks.py | 5 ++- workflow/scripts/runChipseeker.R | 57 ++++++++++++++----------------- 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/workflow/main.nf b/workflow/main.nf index 31ab992..aaa3a26 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -34,6 +34,7 @@ readsList = Channel pairedEnd = params.pairedEnd designFile = params.designFile genomeSize = params.genomeSize +genome = params.genome chromSizes = params.chromSizes cutoffRatio = params.cutoffRatio @@ -377,3 +378,22 @@ process consensusPeaks { """ } + +// Annotate Peaks +process peakAnnotation { + + publishDir "$baseDir/output/${task.process}", mode: 'copy' + + input: + + file consensusPeaks + + output: + file "*chipseeker*" into chipseeker_originalpeak_output + script: + """ + module load python/2.7.x-anaconda + module load R/3.3.2-gccmkl + Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath} +""" +} diff --git a/workflow/scripts/overlap_peaks.py b/workflow/scripts/overlap_peaks.py index 379f884..82f63b5 100644 --- a/workflow/scripts/overlap_peaks.py +++ b/workflow/scripts/overlap_peaks.py @@ -182,9 +182,12 @@ def main(): design_peaks_df = pd.read_csv(design, sep='\t') design_files_df = pd.read_csv(files, sep='\t') - # Make a design file for + # Make a design file for differential binding design_diff = update_design(design_files_df) + # Make a design file for annotating Peaks + design_anno = pd.DataFrame() + # Find consenus overlap peaks for each experiment for experiment, df_experiment in design_peaks_df.groupby('experiment_id'): replicated_peak = overlap(experiment, df_experiment) diff --git a/workflow/scripts/runChipseeker.R b/workflow/scripts/runChipseeker.R index b20df78..8f8b0d0 100644 --- a/workflow/scripts/runChipseeker.R +++ b/workflow/scripts/runChipseeker.R @@ -1,36 +1,32 @@ -args = commandArgs(trailingOnly=TRUE) -#if (length(args)==0) { -# stop("At least one argument must be supplied (input file).n", call.=FALSE) -#} else if (length(args)==1) { -# # default output file -# args[3] = "out.txt" -#} +#!/bin/Rscript library(ChIPseeker) -#Parse the genome path and get genome version -path_elements = unlist(strsplit(args[2],"[/]")) -genome = path_elements[length(path_elements)] - -if(genome=="GRCh37") -{ -library(TxDb.Hsapiens.UCSC.hg19.knownGene) -txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene -} -if(genome=="GRCm38") -{ -library(TxDb.Mmusculus.UCSC.mm10.knownGene) -txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene + + +# Create parser object +parser <- ArgumentParser() + +# Specify our desired options +parser$add_argument("-d", "--design", help = "File path to design file", required = TRUE) +parser$add_argument("-g", "--genome", help = "The genome assembly", required = TRUE) + +# Parse arguments +args <- parser$parse_args() + + +# Load UCSC Known Genes +if(args$genome=='GRCh37') { + library(TxDb.Hsapiens.UCSC.hg19.knownGene) + txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +} else if(args$genome=='GRCm38') { + library(TxDb.Mmusculus.UCSC.mm10.knownGene) + txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene } -if(genome=="GRCh38") -{ -library(TxDb.Hsapiens.UCSC.hg38.knownGene) -txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene +else if(args$genome=='GRCh38') { + library(TxDb.Hsapiens.UCSC.hg38.knownGene) + txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene } -design<-read.csv(args[1]) -files<-as.list(as.character(design$Peaks)) -names(files)<-design$SampleID - peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) for(index in c(1:length(peakAnnoList))) @@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList))) filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="") write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F) #draw individual plot - pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") - vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") + pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") + vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="") pdf(pie_name) plotAnnoPie(peakAnnoList[[index]]) @@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList))) } - -- GitLab