update

cba11424 · Brandi Cantarel · aa2351e8 · aa2351e8 · cba11424 · aa2351e8
Commit cba11424 authored 6 years ago by Brandi Cantarel
--- a/diff_exp/DEA_htseq.R
+++ b/diff_exp/DEA_htseq.R
-#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
-library(edgeR)
-library(DESeq2)
-library("RColorBrewer")
-library("gplots")
-library(qusage)
-rowMax <- function(x) apply(x,1,max)
-col.grp <- function(n,b) {
-  colorvec <- vector(mode="character", length=length(n))
-  plotcols <- rainbow(length(b))
-  for (i in 1:length(n)) {
-    for (j in 1:length(b)) {
-      if ( n[i] == b[j] ) {
-        colorvec[i] = plotcols[j]
-      }
-    }
-  }
-  c(colorvec)
-}
-#################### Read in Data ################################
-genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
-tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
-tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
-ct <- tbl[,4:length(tbl)]
-row.names(ct) <- tbl$ENSEMBL
-samples<- names(ct)
-dtbl <- read.table('design.txt',header=TRUE,sep="\t")
-samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
-grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
-samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
-colData <- samtbl[c('SampleGroup','SubjectID')]
-row.names(colData) <- samtbl$samples
-dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
-dds <- dds[ rowMax(counts(dds)) > 30, ]
-dds <- dds[ colSums(counts(dds)) > 1000000]
-if (nrow(counts(dds)) < 1) {
-print(paste("Samples are filtered if there is < 1M reads.  There are less than no remaining sample(s) after this filter",sep=' '))
-q()
-}
-countTable <- counts(dds)
-#grps <- as.character(levels(colData(dds)$SampleGroup))
-grps <- as.character(colData(dds)$SampleGroup)
-col.blocks <-col.grp(grps,levels(factor(grps)))
-libSizes <- as.vector(colSums(countTable))
-logcpm <- tbl2[,4:length(tbl2)]
-row.names(logcpm) <- tbl2$SYMBOL
-tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
-row.names(tmp.tab) <- tmp.tab$Group.1
-mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
-#################### Run DESEQ2 ################################
-dds <- DESeq(dds)
-rld <- rlogTransformation(dds, blind=TRUE)
-sampleDists <- dist(t(assay(rld)))
-png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
-heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-dev.off()
-#Compare Samples using PCA
-png(file="pca.png",bg ="transparent",height=768,width=1024)
-print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
-dev.off()
-#Do all pairwise comparisons
-contrast <- resultsNames(dds)
-cond<- levels(colData(dds)$SampleGroup)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$pvalue
-      output$logFC <- output$log2FoldChange
-      output$fdr <- output$padj
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      	 subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-      	 gnames <- filt.out[c('ensembl','symbol')]
-      	 s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      	 STREE <- hclust(dist(t(subset)))
-      	 zscores <- scale(t(subset))
-      	 ngenes <- length(colnames(zscores))
-      	 textscale <- (1/(ngenes/30))
-      	 if (textscale > 1) {
-            textscale <-1
-      	 }
-      	 if (textscale < 0.1) {
-            textscale <- 0.1
-      	 }
-      	 png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
-      	 heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      	 legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      	 dev.off()
-      }
-    }
-  }
-}
-###################### Run EdgeR ########################
-###################### Run QuSage ######################
-MSIG.geneSets <- read.gmt('geneset.gmt')
-design <- model.matrix(~grps)
-d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-d <- calcNormFactors(d)
-d <- estimateCommonDisp(d)
-png(file="mds.png",bg ="transparent",height=768,width=1024)
-plotMDS(d, labels=grps,col=col.blocks)
-legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
-dev.off()
-cond <-levels(d$samples$group)
-colnames(design) <- levels(d$samples$group)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      c <- exactTest(d, pair=c(cond[j],cond[i]))
-      res <- c$table
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$PValue
-      output$logFC <- output$logFC
-      output$fdr <- p.adjust(output$rawP, method ='fdr')
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-      gnames <- filt.out[c('ensembl','symbol')]
-      s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      STREE <- hclust(dist(t(subset)))
-      zscores <- scale(t(subset))
-      ngenes <- length(colnames(zscores))
-      textscale <- (1/(ngenes/30))
-      if (textscale > 1) {
-        textscale <-1
-      }
-      if (textscale < 0.1) {
-        textscale <- 0.1
-      }
-      png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
-      heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      dev.off()
-      gcont <- paste(cond[j],cond[i],sep='-')
-      qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
-      save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
-      }
-      }
-  }
-}
-###################### Run Limma VOOM ########################
-# design <- model.matrix(~0+grps)
-# colnames(design) <- grpnames
-# a <- length(cond)-1
-# design.pairs <- c()
-# k <- 0
-# for (i in 1:a) {
-#   for (j in 2:length(cond)) {
-#     if (i == j) {
-#       next
-#     } else {
-#     k <- k+1
-#     design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
-#     }
-#     }
-#     }
-#contrast.matrix <- makeContrasts(design.pairs,levels=design)
-#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-#d <- calcNormFactors(d)
-#d <- estimateCommonDisp(d)
-# v <- voom(d,design,plot=TRUE)
-# fit <- lmFit(v,design)
-# fit2 <- contrasts.fit(fit, contrast.matrix)
-# fit2 <- eBayes(fit2)
-# comps <- colnames(fit2$coefficients)
-# for (i in 1:length(comps)) {
-#   res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
-#   res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-#   output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-#   output$rawP <- output$P.Value
-#   output$logFC <- output$adj.P.Val
-#   output$fdr <- p.adjust(output$rawP, method ='fdr')
-#   output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-#   write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-#   filt.out <- na.omit(output[output$fdr < 0.05,])
-#   if (nrow(filt.out) > 2) {
-#   subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
-#   gnames <- filt.out[c('ensembl','symbol')]
-#   s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-#   STREE <- hclust(dist(t(subset)))
-#   zscores <- scale(t(subset))
-#   ngenes <- length(colnames(zscores))
-#   textscale <- (1/(ngenes/30))
-#   if (textscale > 1) {
-#      textscale <-1
-#   }
-#   if (textscale < 0.1) {
-#     textscale <- 0.1
-#   }
-#   png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
-#   heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-#   legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-#   dev.off()
-#   }
-# }
--- a/diff_exp/DEA_htseq.R
+++ b/diff_exp/DEA_htseq.R
+../genect_rnaseq/DEA_htseq.R
\ No newline at end of file
--- a/diff_exp/build_ballgown.R
+++ b/diff_exp/build_ballgown.R
-library(ballgown)
-args<-commandArgs(TRUE)
-sample_dirs <- args
-samples <- gsub('_stringtie','',sample_dirs)
-design <- "design.txt"
-samtbl <- read.table(file=design,header=TRUE,sep='\t')
-bg <- ballgown(samples=sample_dirs, meas='all')
-samples <- gsub('_stringtie','',sampleNames(bg))
-mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
-pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup))
-save(bg,file="bg.rda")
--- a/diff_exp/build_ballgown.R
+++ b/diff_exp/build_ballgown.R
+../genect_rnaseq/build_ballgown.R
\ No newline at end of file
--- a/diff_exp/concat_edgeR.pl
+++ b/diff_exp/concat_edgeR.pl
-#!/usr/bin/perl -w
-#concat_edgeR.pl
-#symbol	chrom	start	end	ensembl	type	logFC	logCPM	PValue	monocytes	neutrophils	rawP	fdr	bonf
-my @files = @ARGV;
-open OUT, ">edgeR.results.txt" or die $!;
-print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM',
-	       'pval','fdr','G1.Mean','G2.Mean'),"\n";
-foreach $f (@files) {
-    open IN, "<$f" or die $!;
-    my ($fname,$ext) = split(/\.edgeR\./,$f);
-    $fname =~ s/edgeR\///;
-    my ($g1,$g2) = split(/_/,$fname);
-    my $head = <IN>;
-    chomp($head);
-    my @colnames = split(/\t/,$head);
-    while (my $line = <IN>) {
-	chomp($line);
-	my @row = split(/\t/,$line);
-	my %hash;
-	foreach my $i (0..$#row) {
-	    $hash{$colnames[$i]} = $row[$i];
-	}
-	print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type},
-		      sprintf("%.2f",$hash{logFC}),
-		       sprintf("%.2f",$hash{logCPM}),
-		       sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}),
-		       sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n";
-    }
-}
--- a/diff_exp/concat_edgeR.pl
+++ b/diff_exp/concat_edgeR.pl
+../genect_rnaseq/concat_edgeR.pl
\ No newline at end of file
--- a/diff_exp/dea.R
+++ b/diff_exp/dea.R
-#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
-library(edgeR)
-library(DESeq2)
-library("RColorBrewer")
-library("gplots")
-library(qusage)
-rowMax <- function(x) apply(x,1,max)
-col.grp <- function(n,b) {
-  colorvec <- vector(mode="character", length=length(n))
-  plotcols <- rainbow(length(b))
-  for (i in 1:length(n)) {
-    for (j in 1:length(b)) {
-      if ( n[i] == b[j] ) {
-        colorvec[i] = plotcols[j]
-      }
-    }
-  }
-  c(colorvec)
-}
-#################### Read in Data ################################
-genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
-tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
-tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
-ct <- tbl[,4:length(tbl)]
-row.names(ct) <- tbl$ENSEMBL
-samples<- names(ct)
-dtbl <- read.table('design.txt',header=TRUE,sep="\t")
-samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
-grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
-samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
-colData <- samtbl[c('SampleGroup','SubjectID')]
-row.names(colData) <- samtbl$samples
-dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
-dds <- dds[ rowMax(counts(dds)) > 30, ]
-dds <- dds[ colSums(counts(dds)) > 1000000]
-if (nrow(counts(dds)) < 1) {
-print(paste("Samples are filtered if there is < 1M reads.  There are less than no remaining sample(s) after this filter",sep=' '))
-q()
-}
-countTable <- counts(dds)
-#grps <- as.character(levels(colData(dds)$SampleGroup))
-grps <- as.character(colData(dds)$SampleGroup)
-col.blocks <-col.grp(grps,levels(factor(grps)))
-libSizes <- as.vector(colSums(countTable))
-logcpm <- tbl2[,4:length(tbl2)]
-row.names(logcpm) <- tbl2$SYMBOL
-tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
-row.names(tmp.tab) <- tmp.tab$Group.1
-mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
-#################### Run DESEQ2 ################################
-dds <- DESeq(dds)
-rld <- rlogTransformation(dds, blind=TRUE)
-sampleDists <- dist(t(assay(rld)))
-png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
-heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-dev.off()
-#Compare Samples using PCA
-png(file="pca.png",bg ="transparent",height=768,width=1024)
-print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
-dev.off()
-#Do all pairwise comparisons
-contrast <- resultsNames(dds)
-cond<- levels(colData(dds)$SampleGroup)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$pvalue
-      output$logFC <- output$log2FoldChange
-      output$fdr <- output$padj
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      	 subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-      	 gnames <- filt.out[c('ensembl','symbol')]
-      	 s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      	 STREE <- hclust(dist(t(subset)))
-      	 zscores <- scale(t(subset))
-      	 ngenes <- length(colnames(zscores))
-      	 textscale <- (1/(ngenes/30))
-      	 if (textscale > 1) {
-            textscale <-1
-      	 }
-      	 if (textscale < 0.1) {
-            textscale <- 0.1
-      	 }
-      	 png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
-      	 heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      	 legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      	 dev.off()
-      }
-    }
-  }
-}
-###################### Run EdgeR ########################
-###################### Run QuSage ######################
-MSIG.geneSets <- read.gmt('geneset.gmt')
-design <- model.matrix(~grps)
-d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-d <- calcNormFactors(d)
-d <- estimateCommonDisp(d)
-png(file="mds.png",bg ="transparent",height=768,width=1024)
-plotMDS(d, labels=grps,col=col.blocks)
-legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
-dev.off()
-cond <-levels(d$samples$group)
-colnames(design) <- levels(d$samples$group)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      c <- exactTest(d, pair=c(cond[j],cond[i]))
-      res <- c$table
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$PValue
-      output$logFC <- output$logFC
-      output$fdr <- p.adjust(output$rawP, method ='fdr')
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-      gnames <- filt.out[c('ensembl','symbol')]
-      s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      STREE <- hclust(dist(t(subset)))
-      zscores <- scale(t(subset))
-      ngenes <- length(colnames(zscores))
-      textscale <- (1/(ngenes/30))
-      if (textscale > 1) {
-        textscale <-1
-      }
-      if (textscale < 0.1) {
-        textscale <- 0.1
-      }
-      png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
-      heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      dev.off()
-      gcont <- paste(cond[j],cond[i],sep='-')
-      qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
-      save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
-      }
-      }
-  }
-}
-###################### Run Limma VOOM ########################
-# design <- model.matrix(~0+grps)
-# colnames(design) <- grpnames
-# a <- length(cond)-1
-# design.pairs <- c()
-# k <- 0
-# for (i in 1:a) {
-#   for (j in 2:length(cond)) {
-#     if (i == j) {
-#       next
-#     } else {
-#     k <- k+1
-#     design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
-#     }
-#     }
-#     }
-#contrast.matrix <- makeContrasts(design.pairs,levels=design)
-#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-#d <- calcNormFactors(d)
-#d <- estimateCommonDisp(d)
-# v <- voom(d,design,plot=TRUE)
-# fit <- lmFit(v,design)
-# fit2 <- contrasts.fit(fit, contrast.matrix)
-# fit2 <- eBayes(fit2)
-# comps <- colnames(fit2$coefficients)
-# for (i in 1:length(comps)) {
-#   res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
-#   res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-#   output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-#   output$rawP <- output$P.Value
-#   output$logFC <- output$adj.P.Val
-#   output$fdr <- p.adjust(output$rawP, method ='fdr')
-#   output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-#   write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-#   filt.out <- na.omit(output[output$fdr < 0.05,])
-#   if (nrow(filt.out) > 2) {
-#   subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
-#   gnames <- filt.out[c('ensembl','symbol')]
-#   s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-#   STREE <- hclust(dist(t(subset)))
-#   zscores <- scale(t(subset))
-#   ngenes <- length(colnames(zscores))
-#   textscale <- (1/(ngenes/30))
-#   if (textscale > 1) {
-#      textscale <-1
-#   }
-#   if (textscale < 0.1) {
-#     textscale <- 0.1
-#   }
-#   png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
-#   heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-#   legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-#   dev.off()
-#   }
-# }
--- a/diff_exp/dea.R
+++ b/diff_exp/dea.R
+../genect_rnaseq/dea.R
\ No newline at end of file
--- a/diff_exp/gencode_genename.pl
+++ b/diff_exp/gencode_genename.pl
-#!/usr/bin/perl -w
-#parse_gencode.pl
-open OUT, ">genenames.txt" or die $!;
-print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
-my $gtf = shift @ARGV;
-open GCODE, "<$gtf" or die $!;
-while (my $line = <GCODE>) {
-    chomp($line);
-    next if ($line =~ m/^#/);
-    my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = 
-	split(/\t/,$line);
-    next unless ($feature eq 'gene');
-    $info =~ s/\"//g;
-    my %hash;
-    foreach $a (split(/;\s*/,$info)) {
-	my ($key,$val) = split(/ /,$a);
-	$hash{$key} = $val;
-    }
-    $hash{gene_id} =~ s/\.\d+//;
-    print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
-}
--- a/diff_exp/gencode_genename.pl
+++ b/diff_exp/gencode_genename.pl
+../genect_rnaseq/gencode_genename.pl
\ No newline at end of file
--- a/diff_exp/geneabundance.sh
+++ b/diff_exp/geneabundance.sh
-#!/bin/bash
-#rnaseqalign.sh
-usage() {
-  echo "-h Help documentation for hisat.sh"
-  echo "-b  --BAM File"
-  echo "-g  --GTF File"
-  echo "-s  --stranded"
-  echo "-p  --Prefix for output file name"
-  echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam"
-  exit 1
-}
-OPTIND=1 # Reset OPTIND
-while getopts :b:g:p:s:h opt
-do
-    case $opt in
-        b) sbam=$OPTARG;;
-        g) gtf=$OPTARG;;
-        p) pair_id=$OPTARG;;
-        s) stranded=$OPTARG;;
-        h) usage;;
-    esac
-done
-shift $(($OPTIND -1))
-# Check for mandatory options
-if [[ -z $pair_id ]] || [[ -z $sbam ]]
-then
-    usage
-fi
-if [[ -z $SLURM_CPUS_ON_NODE ]]
-then
-    SLURM_CPUS_ON_NODE=1
-fi
-if [[ $SLURM_CPUS_ON_NODE > 64 ]]
-then
-    SLURM_CPUS_ON_NODE=64
-fi
-source /etc/profile.d/modules.sh
-module load subread/1.6.1 stringtie/1.3.2d-intel
-featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
-mkdir ${pair_id}_stringtie
-cd ${pair_id}_stringtie
-stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
--- a/diff_exp/geneabundance.sh
+++ b/diff_exp/geneabundance.sh
+../genect_rnaseq/geneabundance.sh
\ No newline at end of file
--- a/genect_rnaseq/DEA_htseq.R
+++ b/genect_rnaseq/DEA_htseq.R
+#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
+library(edgeR)
+library(DESeq2)
+library("RColorBrewer")
+library("gplots")
+library(qusage)
+rowMax <- function(x) apply(x,1,max)
+col.grp <- function(n,b) {
+  colorvec <- vector(mode="character", length=length(n))
+  plotcols <- rainbow(length(b))
+  for (i in 1:length(n)) {
+    for (j in 1:length(b)) {
+      if ( n[i] == b[j] ) {
+        colorvec[i] = plotcols[j]
+      }
+    }
+  }
+  c(colorvec)
+}
+#################### Read in Data ################################
+genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
+tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
+tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
+ct <- tbl[,4:length(tbl)]
+row.names(ct) <- tbl$ENSEMBL
+samples<- names(ct)
+dtbl <- read.table('design.txt',header=TRUE,sep="\t")
+samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
+grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
+samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
+colData <- samtbl[c('SampleGroup','SubjectID')]
+row.names(colData) <- samtbl$samples
+dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
+dds <- dds[ rowMax(counts(dds)) > 30, ]
+dds <- dds[ colSums(counts(dds)) > 1000000]
+if (nrow(counts(dds)) < 1) {
+print(paste("Samples are filtered if there is < 1M reads.  There are less than no remaining sample(s) after this filter",sep=' '))
+q()
+}
+countTable <- counts(dds)
+#grps <- as.character(levels(colData(dds)$SampleGroup))
+grps <- as.character(colData(dds)$SampleGroup)
+col.blocks <-col.grp(grps,levels(factor(grps)))
+libSizes <- as.vector(colSums(countTable))
+logcpm <- tbl2[,4:length(tbl2)]
+row.names(logcpm) <- tbl2$SYMBOL
+tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
+row.names(tmp.tab) <- tmp.tab$Group.1
+mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
+#################### Run DESEQ2 ################################
+dds <- DESeq(dds)
+rld <- rlogTransformation(dds, blind=TRUE)
+sampleDists <- dist(t(assay(rld)))
+png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
+heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+dev.off()
+#Compare Samples using PCA
+png(file="pca.png",bg ="transparent",height=768,width=1024)
+print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
+dev.off()
+#Do all pairwise comparisons
+contrast <- resultsNames(dds)
+cond<- levels(colData(dds)$SampleGroup)
+a <- length(cond)-1
+for (i in 1:a) {
+  for (j in 2:length(cond)) {
+    if (i == j) {
+      next
+    } else {
+      res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
+      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+      output$rawP <- output$pvalue
+      output$logFC <- output$log2FoldChange
+      output$fdr <- output$padj
+      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+      write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+      filt.out <- na.omit(output[output$fdr < 0.05,])
+      if (nrow(filt.out) > 2) {
+      	 subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
+      	 gnames <- filt.out[c('ensembl','symbol')]
+      	 s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+      	 STREE <- hclust(dist(t(subset)))
+      	 zscores <- scale(t(subset))
+      	 ngenes <- length(colnames(zscores))
+      	 textscale <- (1/(ngenes/30))
+      	 if (textscale > 1) {
+            textscale <-1
+      	 }
+      	 if (textscale < 0.1) {
+            textscale <- 0.1
+      	 }
+      	 png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
+      	 heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+      	 legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+      	 dev.off()
+      }
+    }
+  }
+}
+###################### Run EdgeR ########################
+###################### Run QuSage ######################
+MSIG.geneSets <- read.gmt('geneset.gmt')
+design <- model.matrix(~grps)
+d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
+d <- calcNormFactors(d)
+d <- estimateCommonDisp(d)
+png(file="mds.png",bg ="transparent",height=768,width=1024)
+plotMDS(d, labels=grps,col=col.blocks)
+legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
+dev.off()
+cond <-levels(d$samples$group)
+colnames(design) <- levels(d$samples$group)
+a <- length(cond)-1
+for (i in 1:a) {
+  for (j in 2:length(cond)) {
+    if (i == j) {
+      next
+    } else {
+      c <- exactTest(d, pair=c(cond[j],cond[i]))
+      res <- c$table
+      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+      output$rawP <- output$PValue
+      output$logFC <- output$logFC
+      output$fdr <- p.adjust(output$rawP, method ='fdr')
+      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+      write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+      filt.out <- na.omit(output[output$fdr < 0.05,])
+      if (nrow(filt.out) > 2) {
+      subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
+      gnames <- filt.out[c('ensembl','symbol')]
+      s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+      STREE <- hclust(dist(t(subset)))
+      zscores <- scale(t(subset))
+      ngenes <- length(colnames(zscores))
+      textscale <- (1/(ngenes/30))
+      if (textscale > 1) {
+        textscale <-1
+      }
+      if (textscale < 0.1) {
+        textscale <- 0.1
+      }
+      png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
+      heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+      legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+      dev.off()
+      gcont <- paste(cond[j],cond[i],sep='-')
+      qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
+      save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
+      }
+      }
+  }
+}
+###################### Run Limma VOOM ########################
+# design <- model.matrix(~0+grps)
+# colnames(design) <- grpnames
+# a <- length(cond)-1
+# design.pairs <- c()
+# k <- 0
+# for (i in 1:a) {
+#   for (j in 2:length(cond)) {
+#     if (i == j) {
+#       next
+#     } else {
+#     k <- k+1
+#     design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
+#     }
+#     }
+#     }
+#contrast.matrix <- makeContrasts(design.pairs,levels=design)
+#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
+#d <- calcNormFactors(d)
+#d <- estimateCommonDisp(d)
+# v <- voom(d,design,plot=TRUE)
+# fit <- lmFit(v,design)
+# fit2 <- contrasts.fit(fit, contrast.matrix)
+# fit2 <- eBayes(fit2)
+# comps <- colnames(fit2$coefficients)
+# for (i in 1:length(comps)) {
+#   res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
+#   res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+#   output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+#   output$rawP <- output$P.Value
+#   output$logFC <- output$adj.P.Val
+#   output$fdr <- p.adjust(output$rawP, method ='fdr')
+#   output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+#   write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+#   filt.out <- na.omit(output[output$fdr < 0.05,])
+#   if (nrow(filt.out) > 2) {
+#   subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
+#   gnames <- filt.out[c('ensembl','symbol')]
+#   s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+#   STREE <- hclust(dist(t(subset)))
+#   zscores <- scale(t(subset))
+#   ngenes <- length(colnames(zscores))
+#   textscale <- (1/(ngenes/30))
+#   if (textscale > 1) {
+#      textscale <-1
+#   }
+#   if (textscale < 0.1) {
+#     textscale <- 0.1
+#   }
+#   png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
+#   heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+#   legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+#   dev.off()
+#   }
+# }
--- a/genect_rnaseq/build_ballgown.R
+++ b/genect_rnaseq/build_ballgown.R
+library(ballgown)
+args<-commandArgs(TRUE)
+sample_dirs <- args
+samples <- gsub('_stringtie','',sample_dirs)
+design <- "design.txt"
+samtbl <- read.table(file=design,header=TRUE,sep='\t')
+bg <- ballgown(samples=sample_dirs, meas='all')
+samples <- gsub('_stringtie','',sampleNames(bg))
+mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
+pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup))
+save(bg,file="bg.rda")
--- a/genect_rnaseq/concat_edgeR.pl
+++ b/genect_rnaseq/concat_edgeR.pl
+#!/usr/bin/perl -w
+#concat_edgeR.pl
+#symbol	chrom	start	end	ensembl	type	logFC	logCPM	PValue	monocytes	neutrophils	rawP	fdr	bonf
+my @files = @ARGV;
+open OUT, ">edgeR.results.txt" or die $!;
+print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM',
+	       'pval','fdr','G1.Mean','G2.Mean'),"\n";
+foreach $f (@files) {
+    open IN, "<$f" or die $!;
+    my ($fname,$ext) = split(/\.edgeR\./,$f);
+    $fname =~ s/edgeR\///;
+    my ($g1,$g2) = split(/_/,$fname);
+    my $head = <IN>;
+    chomp($head);
+    my @colnames = split(/\t/,$head);
+    while (my $line = <IN>) {
+	chomp($line);
+	my @row = split(/\t/,$line);
+	my %hash;
+	foreach my $i (0..$#row) {
+	    $hash{$colnames[$i]} = $row[$i];
+	}
+	print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type},
+		      sprintf("%.2f",$hash{logFC}),
+		       sprintf("%.2f",$hash{logCPM}),
+		       sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}),
+		       sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n";
+    }
+}
--- a/genect_rnaseq/dea.R
+++ b/genect_rnaseq/dea.R
+#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
+library(edgeR)
+library(DESeq2)
+library("RColorBrewer")
+library("gplots")
+library(qusage)
+rowMax <- function(x) apply(x,1,max)
+col.grp <- function(n,b) {
+  colorvec <- vector(mode="character", length=length(n))
+  plotcols <- rainbow(length(b))
+  for (i in 1:length(n)) {
+    for (j in 1:length(b)) {
+      if ( n[i] == b[j] ) {
+        colorvec[i] = plotcols[j]
+      }
+    }
+  }
+  c(colorvec)
+}
+#################### Read in Data ################################
+genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
+tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
+tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
+ct <- tbl[,4:length(tbl)]
+row.names(ct) <- tbl$ENSEMBL
+samples<- names(ct)
+dtbl <- read.table('design.txt',header=TRUE,sep="\t")
+samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
+grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
+samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
+colData <- samtbl[c('SampleGroup','SubjectID')]
+row.names(colData) <- samtbl$samples
+dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
+dds <- dds[ rowMax(counts(dds)) > 30, ]
+dds <- dds[ colSums(counts(dds)) > 1000000]
+if (nrow(counts(dds)) < 1) {
+print(paste("Samples are filtered if there is < 1M reads.  There are less than no remaining sample(s) after this filter",sep=' '))
+q()
+}
+countTable <- counts(dds)
+#grps <- as.character(levels(colData(dds)$SampleGroup))
+grps <- as.character(colData(dds)$SampleGroup)
+col.blocks <-col.grp(grps,levels(factor(grps)))
+libSizes <- as.vector(colSums(countTable))
+logcpm <- tbl2[,4:length(tbl2)]
+row.names(logcpm) <- tbl2$SYMBOL
+tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
+row.names(tmp.tab) <- tmp.tab$Group.1
+mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
+#################### Run DESEQ2 ################################
+dds <- DESeq(dds)
+rld <- rlogTransformation(dds, blind=TRUE)
+sampleDists <- dist(t(assay(rld)))
+png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
+heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+dev.off()
+#Compare Samples using PCA
+png(file="pca.png",bg ="transparent",height=768,width=1024)
+print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
+dev.off()
+#Do all pairwise comparisons
+contrast <- resultsNames(dds)
+cond<- levels(colData(dds)$SampleGroup)
+a <- length(cond)-1
+for (i in 1:a) {
+  for (j in 2:length(cond)) {
+    if (i == j) {
+      next
+    } else {
+      res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
+      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+      output$rawP <- output$pvalue
+      output$logFC <- output$log2FoldChange
+      output$fdr <- output$padj
+      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+      write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+      filt.out <- na.omit(output[output$fdr < 0.05,])
+      if (nrow(filt.out) > 2) {
+      	 subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
+      	 gnames <- filt.out[c('ensembl','symbol')]
+      	 s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+      	 STREE <- hclust(dist(t(subset)))
+      	 zscores <- scale(t(subset))
+      	 ngenes <- length(colnames(zscores))
+      	 textscale <- (1/(ngenes/30))
+      	 if (textscale > 1) {
+            textscale <-1
+      	 }
+      	 if (textscale < 0.1) {
+            textscale <- 0.1
+      	 }
+      	 png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
+      	 heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+      	 legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+      	 dev.off()
+      }
+    }
+  }
+}
+###################### Run EdgeR ########################
+###################### Run QuSage ######################
+MSIG.geneSets <- read.gmt('geneset.gmt')
+design <- model.matrix(~grps)
+d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
+d <- calcNormFactors(d)
+d <- estimateCommonDisp(d)
+png(file="mds.png",bg ="transparent",height=768,width=1024)
+plotMDS(d, labels=grps,col=col.blocks)
+legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
+dev.off()
+cond <-levels(d$samples$group)
+colnames(design) <- levels(d$samples$group)
+a <- length(cond)-1
+for (i in 1:a) {
+  for (j in 2:length(cond)) {
+    if (i == j) {
+      next
+    } else {
+      c <- exactTest(d, pair=c(cond[j],cond[i]))
+      res <- c$table
+      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+      output$rawP <- output$PValue
+      output$logFC <- output$logFC
+      output$fdr <- p.adjust(output$rawP, method ='fdr')
+      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+      write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+      filt.out <- na.omit(output[output$fdr < 0.05,])
+      if (nrow(filt.out) > 2) {
+      subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
+      gnames <- filt.out[c('ensembl','symbol')]
+      s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+      STREE <- hclust(dist(t(subset)))
+      zscores <- scale(t(subset))
+      ngenes <- length(colnames(zscores))
+      textscale <- (1/(ngenes/30))
+      if (textscale > 1) {
+        textscale <-1
+      }
+      if (textscale < 0.1) {
+        textscale <- 0.1
+      }
+      png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
+      heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+      legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+      dev.off()
+      gcont <- paste(cond[j],cond[i],sep='-')
+      qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
+      save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
+      }
+      }
+  }
+}
+###################### Run Limma VOOM ########################
+# design <- model.matrix(~0+grps)
+# colnames(design) <- grpnames
+# a <- length(cond)-1
+# design.pairs <- c()
+# k <- 0
+# for (i in 1:a) {
+#   for (j in 2:length(cond)) {
+#     if (i == j) {
+#       next
+#     } else {
+#     k <- k+1
+#     design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
+#     }
+#     }
+#     }
+#contrast.matrix <- makeContrasts(design.pairs,levels=design)
+#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
+#d <- calcNormFactors(d)
+#d <- estimateCommonDisp(d)
+# v <- voom(d,design,plot=TRUE)
+# fit <- lmFit(v,design)
+# fit2 <- contrasts.fit(fit, contrast.matrix)
+# fit2 <- eBayes(fit2)
+# comps <- colnames(fit2$coefficients)
+# for (i in 1:length(comps)) {
+#   res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
+#   res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
+#   output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
+#   output$rawP <- output$P.Value
+#   output$logFC <- output$adj.P.Val
+#   output$fdr <- p.adjust(output$rawP, method ='fdr')
+#   output$bonf <- p.adjust(output$rawP, method ='bonferroni')
+#   write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
+#   filt.out <- na.omit(output[output$fdr < 0.05,])
+#   if (nrow(filt.out) > 2) {
+#   subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
+#   gnames <- filt.out[c('ensembl','symbol')]
+#   s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
+#   STREE <- hclust(dist(t(subset)))
+#   zscores <- scale(t(subset))
+#   ngenes <- length(colnames(zscores))
+#   textscale <- (1/(ngenes/30))
+#   if (textscale > 1) {
+#      textscale <-1
+#   }
+#   if (textscale < 0.1) {
+#     textscale <- 0.1
+#   }
+#   png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
+#   heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
+#   legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
+#   dev.off()
+#   }
+# }
--- a/genect_rnaseq/gencode_genename.pl
+++ b/genect_rnaseq/gencode_genename.pl
+#!/usr/bin/perl -w
+#parse_gencode.pl
+open OUT, ">genenames.txt" or die $!;
+print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
+my $gtf = shift @ARGV;
+open GCODE, "<$gtf" or die $!;
+while (my $line = <GCODE>) {
+    chomp($line);
+    next if ($line =~ m/^#/);
+    my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = 
+	split(/\t/,$line);
+    next unless ($feature eq 'gene');
+    $info =~ s/\"//g;
+    my %hash;
+    foreach $a (split(/;\s*/,$info)) {
+	my ($key,$val) = split(/ /,$a);
+	$hash{$key} = $val;
+    }
+    $hash{gene_id} =~ s/\.\d+//;
+    print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
+}
--- a/genect_rnaseq/geneabundance.sh
+++ b/genect_rnaseq/geneabundance.sh
+#!/bin/bash
+#rnaseqalign.sh
+usage() {
+  echo "-h Help documentation for hisat.sh"
+  echo "-b  --BAM File"
+  echo "-g  --GTF File"
+  echo "-s  --stranded"
+  echo "-p  --Prefix for output file name"
+  echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam"
+  exit 1
+}
+OPTIND=1 # Reset OPTIND
+while getopts :b:g:p:s:h opt
+do
+    case $opt in
+        b) sbam=$OPTARG;;
+        g) gtf=$OPTARG;;
+        p) pair_id=$OPTARG;;
+        s) stranded=$OPTARG;;
+        h) usage;;
+    esac
+done
+shift $(($OPTIND -1))
+# Check for mandatory options
+if [[ -z $pair_id ]] || [[ -z $sbam ]]
+then
+    usage
+fi
+if [[ -z $SLURM_CPUS_ON_NODE ]]
+then
+    SLURM_CPUS_ON_NODE=1
+fi
+if [[ $SLURM_CPUS_ON_NODE > 64 ]]
+then
+    SLURM_CPUS_ON_NODE=64
+fi
+source /etc/profile.d/modules.sh
+module load subread/1.6.1 stringtie/1.3.2d-intel
+featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
+mkdir ${pair_id}_stringtie
+cd ${pair_id}_stringtie
+stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
--- a/genect_rnaseq/statanal.sh
+++ b/genect_rnaseq/statanal.sh
@@ -17,6 +17,7 @@ do
 done
 shift $(($OPTIND -1))
+baseDir="`dirname \"$0\"`"
 # Check for mandatory options
 if [[ -z $SLURM_CPUS_ON_NODE ]]
@@ -25,9 +26,9 @@ then
 fi
 source /etc/profile.d/modules.sh
-perl $baseDir/scripts/concat_cts.pl -o ./ *.cts
+perl $baseDir/concat_cts.pl -o ./ *.cts
-perl $baseDir/scripts/concat_fpkm.pl -o ./ *.fpkm.txt
+perl $baseDir/concat_fpkm.pl -o ./ *.fpkm.txt
-perl $baseDir/scripts/concat_ctsum.pl -o ./ *.cts.summary
+perl $baseDir/concat_ctsum.pl -o ./ *.cts.summary
 cp design.txt design.shiny.txt
 cp geneset.gmt geneset.shiny.gmt
@@ -37,7 +38,7 @@ then
    touch bg.rda
 else
    module load R/3.2.1-intel
-    Rscript  $baseDir/scripts/dea.R
+    Rscript  $baseDir/dea.R
-    Rscript $baseDir/scripts/build_ballgown.R *_stringtie
+    Rscript $baseDir/build_ballgown.R *_stringtie
-    perl $baseDir/scripts/concat_edgeR.pl *.edgeR.txt
+    perl $baseDir/concat_edgeR.pl *.edgeR.txt
 fi