From b5c9ff2e57c26389f1b20a7d7c1db124d14a41b4 Mon Sep 17 00:00:00 2001 From: s181385 <spencer.barnes@utsouthwestern.edu> Date: Mon, 3 Aug 2020 17:30:45 -0500 Subject: [PATCH] fix peak annotation r script --- workflow/scripts/annotate_peaks.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 853f7aa..40aabbf 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -43,13 +43,14 @@ names(files) <- design$Condition peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE) peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) -column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue", +column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue", "pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd", - "geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS", "symbol") + "geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol") for(index in c(1:length(peakAnnoList))) { filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="") df <- as.data.frame(peakAnnoList[[index]]) + df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1) df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T) colnames(df_final) <- column_names write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F) -- GitLab